1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Amd64/x86 Code Generator XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
25 #include "gcinfoencoder.h"
27 // Get the register assigned to the given node
29 regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
31 return tree->gtRegNum;
34 //------------------------------------------------------------------------
35 // genSpillVar: Spill a local variable
38 // tree - the lclVar node for the variable being spilled
44 // The lclVar must be a register candidate (lvRegCandidate)
46 void CodeGen::genSpillVar(GenTreePtr tree)
48 unsigned varNum = tree->gtLclVarCommon.gtLclNum;
49 LclVarDsc * varDsc = &(compiler->lvaTable[varNum]);
51 assert(varDsc->lvIsRegCandidate());
53 // We don't actually need to spill if it is already living in memory
54 bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
57 var_types lclTyp = varDsc->TypeGet();
58 if (varDsc->lvNormalizeOnStore())
59 lclTyp = genActualType(lclTyp);
60 emitAttr size = emitTypeSize(lclTyp);
62 bool restoreRegVar = false;
63 if (tree->gtOper == GT_REG_VAR)
65 tree->SetOper(GT_LCL_VAR);
69 // mask off the flag to generate the right spill code, then bring it back
70 tree->gtFlags &= ~GTF_REG_VAL;
72 instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
73 #if CPU_LONG_USES_REGPAIR
74 if (varTypeIsMultiReg(tree))
76 assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
77 assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
78 regNumber regLo = genRegPairLo(tree->gtRegPair);
79 regNumber regHi = genRegPairHi(tree->gtRegPair);
80 inst_TT_RV(storeIns, tree, regLo);
81 inst_TT_RV(storeIns, tree, regHi, 4);
86 assert(varDsc->lvRegNum == tree->gtRegNum);
87 inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
89 tree->gtFlags |= GTF_REG_VAL;
93 tree->SetOper(GT_REG_VAR);
96 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
97 gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
99 if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
102 if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
104 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
108 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
111 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
116 tree->gtFlags &= ~GTF_SPILL;
117 varDsc->lvRegNum = REG_STK;
118 if (varTypeIsMultiReg(tree))
120 varDsc->lvOtherReg = REG_STK;
125 void CodeGenInterface::genUpdateVarReg(LclVarDsc * varDsc, GenTreePtr tree)
127 assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
128 varDsc->lvRegNum = tree->gtRegNum;
132 /*****************************************************************************/
133 /*****************************************************************************/
135 /*****************************************************************************
137 * Generate code that will set the given register to the integer constant.
140 void CodeGen::genSetRegToIcon(regNumber reg,
145 // Reg cannot be a FP reg
146 assert(!genIsValidFloatReg(reg));
148 // The only TYP_REF constant that can come this path is a managed 'null' since it is not
149 // relocatable. Other ref type constants (e.g. string objects) go through a different
151 noway_assert(type != TYP_REF || val == 0);
155 instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
159 // TODO-XArch-CQ: needs all the optimized cases
160 getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
165 /*****************************************************************************
167 * Generate code to check that the GS cookie wasn't thrashed by a buffer
168 * overrun. If pushReg is true, preserve all registers around code sequence.
169 * Otherwise ECX could be modified.
171 * Implementation Note: pushReg = true, in case of tail calls.
173 void CodeGen::genEmitGSCookieCheck(bool pushReg)
175 noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
177 // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
178 // executing GS cookie check will not collect the object pointed to by EAX.
179 if (!pushReg && (compiler->info.compRetType == TYP_REF))
180 gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
182 regNumber regGSCheck;
185 // Non-tail call: we can use any callee trash register that is not
186 // a return register or contain 'this' pointer (keep alive this), since
187 // we are generating GS cookie check after a GT_RETURN block.
188 if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
189 (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
191 regGSCheck = REG_RDX;
195 regGSCheck = REG_RCX;
201 NYI_X86("Tail calls from methods that need GS check");
203 #else // !_TARGET_X86_
204 // Tail calls from methods that need GS check: We need to preserve registers while
205 // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
206 // check, we might need a register. This won't be an issue for jmp calls for the
207 // reason mentioned below (see comment starting with "Jmp Calls:").
209 // The following are the possible solutions in case of tail prefixed calls:
210 // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
211 // present in methods that require GS cookie check. Rest of the tail calls that
212 // do not require R11 will be honored.
213 // 2) Internal register - GT_CALL node reserves an internal register and emits GS
214 // cookie check as part of tail call codegen. GenExitCode() needs to special case
215 // fast tail calls implemented as epilog+jmp or such tail calls should always get
216 // dispatched via helper.
217 // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
218 // right execution order during rationalization.
220 // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
221 // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed
222 // VSD calls from methods that need GS check.
224 // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
225 // ignores tail prefix. In future, if we intend to support tail calls from such a method,
226 // consider one of the options mentioned above. For now adding an assert that we don't
227 // expect to see a tail call in a method that requires GS check.
228 noway_assert(!compiler->compTailCallUsed);
230 // Jmp calls: specify method handle using which JIT queries VM for its entry point
231 // address and hence it can neither be a VSD call nor PInvoke calli with cookie
232 // parameter. Therefore, in case of jmp calls it is safe to use R11.
233 regGSCheck = REG_R11;
234 #endif // !_TARGET_X86_
237 if (compiler->gsGlobalSecurityCookieAddr == nullptr)
239 // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
240 // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
241 if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
243 genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
244 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
248 getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
249 (int)compiler->gsGlobalSecurityCookieVal);
254 // Ngen case - GS cookie value needs to be accessed through an indirection.
255 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
256 getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
257 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
260 BasicBlock *gsCheckBlk = genCreateTempLabel();
261 inst_JMP(genJumpKindForOper(GT_EQ, true), gsCheckBlk);
262 genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
263 genDefineTempLabel(gsCheckBlk);
266 /*****************************************************************************
268 * Generate code for all the basic blocks in the function.
271 void CodeGen::genCodeForBBlist()
276 unsigned savedStkLvl;
279 genInterruptibleUsed = true;
280 unsigned stmtNum = 0;
281 UINT64 totalCostEx = 0;
282 UINT64 totalCostSz = 0;
284 // You have to be careful if you create basic blocks from now on
285 compiler->fgSafeBasicBlockCreation = false;
287 // This stress mode is not comptible with fully interruptible GC
288 if (genInterruptible && compiler->opts.compStackCheckOnCall)
290 compiler->opts.compStackCheckOnCall = false;
293 // This stress mode is not comptible with fully interruptible GC
294 if (genInterruptible && compiler->opts.compStackCheckOnRet)
296 compiler->opts.compStackCheckOnRet = false;
300 // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
301 genPrepForEHCodegen();
303 assert(!compiler->fgFirstBBScratch || compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
305 /* Initialize the spill tracking logic */
309 /* Initialize the line# tracking logic */
311 #ifdef DEBUGGING_SUPPORT
312 if (compiler->opts.compScopeInfo)
318 // The current implementation of switch tables requires the first block to have a label so it
319 // can generate offsets to the switch label targets.
320 // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
321 if (compiler->fgHasSwitch)
323 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
326 genPendingCallLabel = nullptr;
328 /* Initialize the pointer tracking code */
330 gcInfo.gcRegPtrSetInit();
331 gcInfo.gcVarPtrSetInit();
333 /* If any arguments live in registers, mark those regs as such */
335 for (varNum = 0, varDsc = compiler->lvaTable;
336 varNum < compiler->lvaCount;
339 /* Is this variable a parameter assigned to a register? */
341 if (!varDsc->lvIsParam || !varDsc->lvRegister)
344 /* Is the argument live on entry to the method? */
346 if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
349 /* Is this a floating-point argument? */
351 if (varDsc->IsFloatRegType())
354 noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
356 /* Mark the register as holding the variable */
358 regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
361 unsigned finallyNesting = 0;
363 // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
364 // allocation at the start of each basic block.
365 VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
367 /*-------------------------------------------------------------------------
369 * Walk the basic blocks and generate code for each one
374 BasicBlock * lblk; /* previous block */
376 for (lblk = NULL, block = compiler->fgFirstBB;
378 lblk = block, block = block->bbNext)
381 if (compiler->verbose)
383 printf("\n=============== Generating ");
384 block->dspBlockHeader(compiler, true, true);
385 compiler->fgDispBBLiveness(block);
389 // Figure out which registers hold variables on entry to this block
391 regSet.ClearMaskVars();
392 gcInfo.gcRegGCrefSetCur = RBM_NONE;
393 gcInfo.gcRegByrefSetCur = RBM_NONE;
395 compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
397 genUpdateLife(block->bbLiveIn);
399 // Even if liveness didn't change, we need to update the registers containing GC references.
400 // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change?
401 // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here.
402 // That would require handling the changes in recordVarLocationsAtStartOfBB().
404 regMaskTP newLiveRegSet = RBM_NONE;
405 regMaskTP newRegGCrefSet = RBM_NONE;
406 regMaskTP newRegByrefSet = RBM_NONE;
408 VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
409 VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
411 VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
412 while (iter.NextElem(compiler, &varIndex))
414 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
415 LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
417 if (varDsc->lvIsInReg())
419 newLiveRegSet |= varDsc->lvRegMask();
420 if (varDsc->lvType == TYP_REF)
422 newRegGCrefSet |= varDsc->lvRegMask();
424 else if (varDsc->lvType == TYP_BYREF)
426 newRegByrefSet |= varDsc->lvRegMask();
429 if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
431 VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
434 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
436 else if (compiler->lvaIsGCTracked(varDsc))
439 if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
441 VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
444 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
448 regSet.rsMaskVars = newLiveRegSet;
451 if (compiler->verbose)
453 if (!VarSetOps::IsEmpty(compiler, addedGCVars))
455 printf("\t\t\t\t\t\t\tAdded GCVars: ");
456 dumpConvertedVarSet(compiler, addedGCVars);
459 if (!VarSetOps::IsEmpty(compiler, removedGCVars))
461 printf("\t\t\t\t\t\t\tRemoved GCVars: ");
462 dumpConvertedVarSet(compiler, removedGCVars);
468 gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUG_ARG(true));
469 gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUG_ARG(true));
471 /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
472 represent the exception object (TYP_REF).
473 We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
474 to the block, it will be the first thing evaluated
475 (thanks to GTF_ORDER_SIDEEFF).
478 if (handlerGetsXcptnObj(block->bbCatchTyp))
480 #if JIT_FEATURE_SSA_SKIP_DEFS
481 GenTreePtr firstStmt = block->FirstNonPhiDef();
483 GenTreePtr firstStmt = block->bbTreeList;
485 if (firstStmt != NULL)
487 GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
488 if (compiler->gtHasCatchArg(firstTree))
490 gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
495 /* Start a new code output block */
497 genUpdateCurrentFunclet(block);
499 if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
501 getEmitter()->emitLoopAlign();
505 if (compiler->opts.dspCode)
506 printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
509 block->bbEmitCookie = NULL;
511 if (block->bbFlags & (BBF_JMP_TARGET|BBF_HAS_LABEL))
513 /* Mark a label and update the current set of live GC refs */
515 block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur,
516 gcInfo.gcRegGCrefSetCur,
517 gcInfo.gcRegByrefSetCur,
521 if (block == compiler->fgFirstColdBlock)
524 if (compiler->verbose)
526 printf("\nThis is the start of the cold region of the method\n");
529 // We should never have a block that falls through into the Cold section
530 noway_assert(!lblk->bbFallsThrough());
532 // We require the block that starts the Cold section to have a label
533 noway_assert(block->bbEmitCookie);
534 getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
537 /* Both stacks are always empty on entry to a basic block */
541 savedStkLvl = genStackLevel;
543 /* Tell everyone which basic block we're working on */
545 compiler->compCurBB = block;
547 #ifdef DEBUGGING_SUPPORT
550 // BBF_INTERNAL blocks don't correspond to any single IL instruction.
551 if (compiler->opts.compDbgInfo &&
552 (block->bbFlags & BBF_INTERNAL) &&
553 !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to emit a NO_MAPPING entry, immediately after the prolog.
555 genIPmappingAdd((IL_OFFSETX) ICorDebugInfo::NO_MAPPING, true);
558 bool firstMapping = true;
559 #endif // DEBUGGING_SUPPORT
561 /*---------------------------------------------------------------------
563 * Generate code for each statement-tree in the block
567 #if FEATURE_EH_FUNCLETS
568 if (block->bbFlags & BBF_FUNCLET_BEG)
570 genReserveFuncletProlog(block);
572 #endif // FEATURE_EH_FUNCLETS
574 for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
576 noway_assert(stmt->gtOper == GT_STMT);
578 if (stmt->AsStmt()->gtStmtIsEmbedded())
581 /* Get hold of the statement tree */
582 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
584 #if defined(DEBUGGING_SUPPORT)
586 /* Do we have a new IL-offset ? */
588 if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
590 /* Create and append a new IP-mapping entry */
591 genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
592 firstMapping = false;
595 #endif // DEBUGGING_SUPPORT
598 noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
599 stmt->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
601 if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
602 stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
604 while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
607 dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
612 if (compiler->verbose)
614 printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
615 printf("Holding variables: ");
616 dspRegMask(regSet.rsMaskVars); printf("\n\n");
617 if (compiler->verboseTrees)
619 compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
623 totalCostEx += ((UINT64)stmt->gtCostEx * block->getBBWeight(compiler));
624 totalCostSz += (UINT64) stmt->gtCostSz;
627 // Traverse the tree in linear order, generating code for each node in the
628 // tree as we encounter it
630 compiler->compCurLifeTree = NULL;
631 compiler->compCurStmt = stmt;
632 for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList;
634 treeNode = treeNode->gtNext)
636 genCodeForTreeNode(treeNode);
637 if (treeNode->gtHasReg() && treeNode->gtLsraInfo.isLocalDefUse)
639 genConsumeReg(treeNode);
643 // If the next statement expr is a SIMDIntrinsicUpperRestore, don't call rsSpillChk because we
644 // haven't yet restored spills from the most recent call.
645 GenTree* nextTopLevelStmt = stmt->AsStmt()->gtStmtNextTopLevelStmt();
646 if ((nextTopLevelStmt == nullptr) ||
647 (nextTopLevelStmt->AsStmt()->gtStmtExpr->OperGet() != GT_SIMD) ||
648 (nextTopLevelStmt->AsStmt()->gtStmtExpr->gtSIMD.gtSIMDIntrinsicID != SIMDIntrinsicUpperRestore))
649 #endif // FEATURE_SIMD
655 /* Make sure we didn't bungle pointer register tracking */
657 regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur);
658 regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
660 // If return is a GC-type, clear it. Note that if a common
661 // epilog is generated (genReturnBB) it has a void return
662 // even though we might return a ref. We can't use the compRetType
663 // as the determiner because something we are tracking as a byref
664 // might be used as a return value of a int function (which is legal)
665 if (tree->gtOper == GT_RETURN &&
666 (varTypeIsGC(compiler->info.compRetType) ||
667 (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
669 nonVarPtrRegs &= ~RBM_INTRET;
672 // When profiling, the first statement in a catch block will be the
673 // harmless "inc" instruction (does not interfere with the exception
676 if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) &&
677 (stmt == block->bbTreeList) &&
678 handlerGetsXcptnObj(block->bbCatchTyp))
680 nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
685 printf("Regset after tree=");
686 compiler->printTreeID(tree);
687 printf(" BB%02u gcr=", block->bbNum);
688 printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
689 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
691 printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
692 compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
693 printf(", regVars=");
694 printRegMaskInt(regSet.rsMaskVars);
695 compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
699 noway_assert(nonVarPtrRegs == 0);
701 for (GenTree * node = stmt->gtStmt.gtStmtList; node; node=node->gtNext)
703 assert(!(node->gtFlags & GTF_SPILL));
708 noway_assert(stmt->gtOper == GT_STMT);
710 #ifdef DEBUGGING_SUPPORT
711 genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
714 } //-------- END-FOR each statement-tree of the current block ---------
716 #if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
717 if (block->bbNext == nullptr)
719 // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
720 // (it's as good as any, but better than the prolog, which can only be a single instruction
721 // group) then use COMPLUS_JitLateDisasm=* to see if the late disassembler
722 // thinks the instructions are the same as we do.
723 genAmd64EmitterUnitTests();
725 #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
727 #ifdef DEBUGGING_SUPPORT
729 if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
733 /* Is this the last block, and are there any open scopes left ? */
735 bool isLastBlockProcessed = (block->bbNext == NULL);
736 if (block->isBBCallAlwaysPair())
738 isLastBlockProcessed = (block->bbNext->bbNext == NULL);
741 if (isLastBlockProcessed && siOpenScopeList.scNext)
743 /* This assert no longer holds, because we may insert a throw
744 block to demarcate the end of a try or finally region when they
745 are at the end of the method. It would be nice if we could fix
746 our code so that this throw block will no longer be necessary. */
748 //noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
750 siCloseAllOpenScopes();
754 #endif // DEBUGGING_SUPPORT
756 genStackLevel -= savedStkLvl;
759 // compCurLife should be equal to the liveOut set, except that we don't keep
760 // it up to date for vars that are not register candidates
761 // (it would be nice to have a xor set function)
763 VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
764 VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
765 VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
766 while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
768 unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
769 LclVarDsc * varDsc = compiler->lvaTable + varNum;
770 assert(!varDsc->lvIsRegCandidate());
774 /* Both stacks should always be empty on exit from a basic block */
775 noway_assert(genStackLevel == 0);
777 #ifdef _TARGET_AMD64_
778 // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
779 // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
780 // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
781 // The document "X64 and ARM ABIs.docx" has more details. The situations:
782 // 1. If the call instruction is in a different EH region as the instruction that follows it.
783 // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
784 // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.)
785 // We handle case #1 here, and case #2 in the emitter.
786 if (getEmitter()->emitIsLastInsCall())
788 // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
789 // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
790 // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
791 // generated before the OS epilog starts, such as a GS cookie check.
792 if ((block->bbNext == nullptr) ||
793 !BasicBlock::sameEHRegion(block, block->bbNext))
795 // We only need the NOP if we're not going to generate any more code as part of the block end.
797 switch (block->bbJumpKind)
801 case BBJ_CALLFINALLY:
803 // We're going to generate more code below anyway, so no need for the NOP.
806 case BBJ_EHFINALLYRET:
807 case BBJ_EHFILTERRET:
808 // These are the "epilog follows" case, handled in the emitter.
813 if (block->bbNext == nullptr)
815 // Call immediately before the end of the code; we should never get here .
816 instGen(INS_BREAKPOINT); // This should never get executed
827 // These can't have a call as the last instruction!
830 noway_assert(!"Unexpected bbJumpKind");
835 #endif // _TARGET_AMD64_
837 /* Do we need to generate a jump or return? */
839 switch (block->bbJumpKind)
842 inst_JMP(EJ_jmp, block->bbJumpDest);
850 // If we have a throw at the end of a function or funclet, we need to emit another instruction
851 // afterwards to help the OS unwinder determine the correct context during unwind.
852 // We insert an unexecuted breakpoint instruction in several situations
853 // following a throw instruction:
854 // 1. If the throw is the last instruction of the function or funclet. This helps
855 // the OS unwinder determine the correct context during an unwind from the
857 // 2. If this is this is the last block of the hot section.
858 // 3. If the subsequent block is a special throw block.
859 // 4. On AMD64, if the next block is in a different EH region.
860 if ((block->bbNext == NULL)
861 || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
862 || !BasicBlock::sameEHRegion(block, block->bbNext)
863 || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext))
864 || block->bbNext == compiler->fgFirstColdBlock
867 instGen(INS_BREAKPOINT); // This should never get executed
872 case BBJ_CALLFINALLY:
874 #if FEATURE_EH_FUNCLETS
875 // Generate a call to the finally, like this:
876 // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym
877 // call finally-funclet
878 // jmp finally-return // Only for non-retless finally calls
879 // The jmp can be a NOP if we're going to the next block.
881 getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
882 getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
884 if (block->bbFlags & BBF_RETLESS_CALL)
886 // We have a retless call, and the last instruction generated was a call.
887 // If the next block is in a different EH region (or is the end of the code
888 // block), then we need to generate a breakpoint here (since it will never
889 // get executed) to get proper unwind behavior.
891 if ((block->bbNext == nullptr) ||
892 !BasicBlock::sameEHRegion(block, block->bbNext))
894 instGen(INS_BREAKPOINT); // This should never get executed
899 // Because of the way the flowgraph is connected, the liveness info for this one instruction
900 // after the call is not (can not be) correct in cases where a variable has a last use in the
901 // handler. So turn off GC reporting for this single instruction.
902 getEmitter()->emitDisableGC();
904 // Now go to where the finally funclet needs to return to.
905 if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
908 // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
909 // to the next instruction? This would depend on stack walking from within the finally
910 // handler working without this instruction being in this special EH region.
915 inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
918 getEmitter()->emitEnableGC();
921 // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
922 // jump target using bbJumpDest - that is already used to point
923 // to the finally block. So just skip past the BBJ_ALWAYS unless the
925 if ( !(block->bbFlags & BBF_RETLESS_CALL) )
927 assert(block->isBBCallAlwaysPair());
930 block = block->bbNext;
932 #else // !FEATURE_EH_FUNCLETS
933 NYI_X86("EH for RyuJIT x86");
934 #endif // !FEATURE_EH_FUNCLETS
938 // Set EAX to the address the VM should return to after the catch.
939 // Generate a RIP-relative
940 // lea reg, [rip + disp32] ; the RIP is implicit
941 // which will be position-indepenent.
942 getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
945 case BBJ_EHFINALLYRET:
946 case BBJ_EHFILTERRET:
947 #if FEATURE_EH_FUNCLETS
948 genReserveFuncletEpilog(block);
949 #else // !FEATURE_EH_FUNCLETS
950 NYI_X86("EH for RyuJIT x86");
951 #endif // !FEATURE_EH_FUNCLETS
960 noway_assert(!"Unexpected bbJumpKind");
965 compiler->compCurBB = 0;
968 } //------------------ END-FOR each block of the method -------------------
970 /* Nothing is live at this point */
971 genUpdateLife(VarSetOps::MakeEmpty(compiler));
973 /* Finalize the spill tracking logic */
977 /* Finalize the temp tracking logic */
982 if (compiler->verbose)
985 printf("totalCostEx = %6d, totalCostSz = %5d ",
986 totalCostEx, totalCostSz);
987 printf("%s\n", compiler->info.compFullName);
992 // return the child that has the same reg as the dst (if any)
993 // other child returned (out param) in 'other'
995 sameRegAsDst(GenTree *tree, GenTree *&other /*out*/)
997 if (tree->gtRegNum == REG_NA)
1003 GenTreePtr op1 = tree->gtOp.gtOp1;
1004 GenTreePtr op2 = tree->gtOp.gtOp2;
1005 if (op1->gtRegNum == tree->gtRegNum)
1010 if (op2->gtRegNum == tree->gtRegNum)
1022 // Move an immediate value into an integer register
1024 void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size,
1029 // reg cannot be a FP register
1030 assert(!genIsValidFloatReg(reg));
1032 if (!compiler->opts.compReloc)
1034 size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
1037 if ((imm == 0) && !EA_IS_RELOC(size))
1039 instGen_Set_Reg_To_Zero(size, reg, flags);
1043 if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
1045 getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
1049 getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
1052 regTracker.rsTrackRegIntCns(reg, imm);
1055 /***********************************************************************************
1057 * Generate code to set a register 'targetReg' of type 'targetType' to the constant
1058 * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
1059 * genProduceReg() on the target register.
1061 void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
1064 switch (tree->gtOper)
1068 // relocatable values tend to come down as a CNS_INT of native int type
1069 // so the line between these two opcodes is kind of blurry
1070 GenTreeIntConCommon* con = tree->AsIntConCommon();
1071 ssize_t cnsVal = con->IconValue();
1073 if (con->ImmedValNeedsReloc(compiler))
1075 instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
1076 regTracker.rsTrackRegTrash(targetReg);
1080 genSetRegToIcon(targetReg, cnsVal, targetType);
1087 double constValue = tree->gtDblCon.gtDconVal;
1089 // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0)
1090 if (*(__int64*)&constValue == 0)
1092 // A faster/smaller way to generate 0
1093 instruction ins = genGetInsForOper(GT_XOR, targetType);
1094 inst_RV_RV(ins, targetReg, targetReg, targetType);
1099 if (targetType == TYP_FLOAT)
1101 float f = forceCastToFloat(constValue);
1102 cns = genMakeConst(&f, targetType, tree, false);
1106 cns = genMakeConst(&constValue, targetType, tree, true);
1109 inst_RV_TT(ins_Load(targetType), targetReg, cns);
1120 // Generate code to get the high N bits of a N*N=2N bit multiplication result
1121 void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
1123 assert(!(treeNode->gtFlags & GTF_UNSIGNED));
1124 assert(!treeNode->gtOverflowEx());
1126 regNumber targetReg = treeNode->gtRegNum;
1127 var_types targetType = treeNode->TypeGet();
1128 emitter *emit = getEmitter();
1129 emitAttr size = emitTypeSize(treeNode);
1130 GenTree *op1 = treeNode->gtOp.gtOp1;
1131 GenTree *op2 = treeNode->gtOp.gtOp2;
1133 // to get the high bits of the multiply, we are constrained to using the
1134 // 1-op form: RDX:RAX = RAX * rm
1135 // The 3-op form (Rx=Ry*Rz) does not support it.
1137 genConsumeOperands(treeNode->AsOp());
1139 GenTree* regOp = op1;
1140 GenTree* rmOp = op2;
1142 // Set rmOp to the contained memory operand (if any)
1144 if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
1149 assert(!regOp->isContained());
1151 // Setup targetReg when neither of the source operands was a matching register
1152 if (regOp->gtRegNum != targetReg)
1154 inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
1157 emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
1159 // Move the result to the desired register, if necessary
1160 if (targetReg != REG_RDX)
1162 inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
1166 // generate code for a DIV or MOD operation
1168 void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
1170 GenTree *dividend = treeNode->gtOp1;
1171 GenTree *divisor = treeNode->gtOp2;
1172 genTreeOps oper = treeNode->OperGet();
1173 emitAttr size = emitTypeSize(treeNode);
1174 regNumber targetReg = treeNode->gtRegNum;
1175 var_types targetType = treeNode->TypeGet();
1176 emitter *emit = getEmitter();
1178 // dividend is not contained.
1179 assert(!dividend->isContained());
1181 genConsumeOperands(treeNode->AsOp());
1182 if (varTypeIsFloating(targetType))
1184 // divisor is not contained or if contained is a memory op
1185 assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl());
1187 // Floating point div/rem operation
1188 assert(oper == GT_DIV || oper == GT_MOD);
1190 if (dividend->gtRegNum == targetReg)
1192 emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
1194 else if (divisor->gtRegNum == targetReg)
1196 // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
1197 // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
1198 // LSRA has to make sure that such a register assignment is not generated for floating
1199 // point div/rem operations.
1200 noway_assert(!"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
1204 inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
1205 emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
1210 // dividend must be in RAX
1211 if (dividend->gtRegNum != REG_RAX)
1212 inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
1214 // zero or sign extend rax to rdx
1215 if (oper == GT_UMOD || oper == GT_UDIV)
1217 instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
1221 emit->emitIns(INS_cdq, size);
1222 // the cdq instruction writes RDX, So clear the gcInfo for RDX
1223 gcInfo.gcMarkRegSetNpt(RBM_RDX);
1226 if (divisor->isContainedIntOrIImmed())
1228 GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
1229 assert(divImm->IsIntCnsFitsInI32());
1230 ssize_t imm = divImm->IconValue();
1231 assert(isPow2(abs(imm)));
1232 genCodeForPow2Div(treeNode->AsOp());
1236 // Perform the 'targetType' (64-bit or 32-bit) divide instruction
1238 if (oper == GT_UMOD || oper == GT_UDIV)
1243 emit->emitInsBinary(ins, size, treeNode, divisor);
1245 // Signed divide RDX:RAX by r/m64, with result
1246 // stored in RAX := Quotient, RDX := Remainder.
1247 // Move the result to the desired register, if necessary
1248 if (oper == GT_DIV || oper == GT_UDIV)
1250 if (targetReg != REG_RAX)
1252 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
1257 assert((oper == GT_MOD) || (oper == GT_UMOD));
1258 if (targetReg != REG_RDX)
1260 inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
1265 genProduceReg(treeNode);
1268 //------------------------------------------------------------------------
1269 // genCodeForBinary: Generate code for many binary arithmetic operators
1272 // treeNode - The binary operation for which we are generating code.
1278 // Mul and div variants have special constraints on x64 so are not handled here.
1279 // See teh assert below for the operators that are handled.
1281 void CodeGen::genCodeForBinary(GenTree* treeNode)
1283 const genTreeOps oper = treeNode->OperGet();
1284 regNumber targetReg = treeNode->gtRegNum;
1285 var_types targetType = treeNode->TypeGet();
1286 emitter *emit = getEmitter();
1288 #if defined(_TARGET_64BIT_)
1289 assert (oper == GT_OR ||
1294 #else // !defined(_TARGET_64BIT_)
1295 assert (oper == GT_OR ||
1298 oper == GT_ADD_HI ||
1299 oper == GT_SUB_HI ||
1300 oper == GT_MUL_HI ||
1301 oper == GT_DIV_HI ||
1302 oper == GT_MOD_HI ||
1305 #endif // !defined(_TARGET_64BIT_)
1307 GenTreePtr op1 = treeNode->gtGetOp1();
1308 GenTreePtr op2 = treeNode->gtGetOp2();
1310 // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
1311 if (op1->isContained())
1313 assert(treeNode->OperIsCommutative());
1314 assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32());
1316 op1 = treeNode->gtGetOp2();
1317 op2 = treeNode->gtGetOp1();
1320 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
1322 // The arithmetic node must be sitting in a register (since it's not contained)
1323 noway_assert(targetReg != REG_NA);
1325 regNumber op1reg = op1->gtRegNum;
1326 regNumber op2reg = op2->gtRegNum;
1331 genConsumeOperands(treeNode->AsOp());
1333 // This is the case of reg1 = reg1 op reg2
1334 // We're ready to emit the instruction without any moves
1335 if (op1reg == targetReg)
1340 // We have reg1 = reg2 op reg1
1341 // In order for this operation to be correct
1342 // we need that op is a commutative operation so
1343 // we can convert it into reg1 = reg1 op reg2 and emit
1344 // the same code as above
1345 else if (op2reg == targetReg)
1347 noway_assert(GenTree::OperIsCommutative(oper));
1351 // now we know there are 3 different operands so attempt to use LEA
1352 else if (oper == GT_ADD
1353 && !varTypeIsFloating(treeNode)
1354 && !treeNode->gtOverflowEx() // LEA does not set flags
1355 && (op2->isContainedIntOrIImmed() || !op2->isContained())
1358 if (op2->isContainedIntOrIImmed())
1360 emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, (int) op2->AsIntConCommon()->IconValue());
1364 assert(op2reg != REG_NA);
1365 emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
1367 genProduceReg(treeNode);
1370 // dest, op1 and op2 registers are different:
1371 // reg3 = reg1 op reg2
1372 // We can implement this by issuing a mov:
1374 // reg3 = reg3 op reg2
1377 inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
1378 regTracker.rsTrackRegCopy(targetReg, op1reg);
1379 gcInfo.gcMarkRegPtrVal(targetReg, targetType);
1384 // try to use an inc or dec
1386 && !varTypeIsFloating(treeNode)
1387 && src->isContainedIntOrIImmed()
1388 && !treeNode->gtOverflowEx())
1390 if (src->gtIntConCommon.IconValue() == 1)
1392 emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
1393 genProduceReg(treeNode);
1396 else if (src->gtIntConCommon.IconValue() == -1)
1398 emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
1399 genProduceReg(treeNode);
1403 regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
1404 noway_assert(r == targetReg);
1406 if (treeNode->gtOverflowEx())
1408 assert(oper == GT_ADD || oper == GT_SUB);
1409 genCheckOverflow(treeNode);
1411 genProduceReg(treeNode);
1414 //------------------------------------------------------------------------
1415 // isStructReturn: Returns whether the 'treeNode' is returning a struct.
1418 // treeNode - The tree node to evaluate whether is a struct return.
1421 // For AMD64 *nix: returns true if the 'treeNode" is of type GT_RETURN and the
1422 // return type is a struct or it is an implicit retBuf struct return.
1423 // Otherwise returns false.
1424 // For other platforms always returns false.
1427 CodeGen::isStructReturn(GenTreePtr treeNode)
1429 // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
1430 // For the GT_RET_FILT, the return is always
1431 // a bool or a void, for the end of a finally block.
1432 noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1433 if (treeNode->OperGet() != GT_RETURN)
1438 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1439 return varTypeIsStruct(treeNode) ||
1440 (treeNode->TypeGet() == TYP_VOID && compiler->info.compRetBuffArg != BAD_VAR_NUM);
1441 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
1442 assert(!varTypeIsStruct(treeNode));
1444 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
1447 //------------------------------------------------------------------------
1448 // genStructReturn: Generates code for returning a struct.
1451 // treeNode - The GT_RETURN tree node.
1457 CodeGen::genStructReturn(GenTreePtr treeNode)
1459 assert(treeNode->OperGet() == GT_RETURN);
1460 GenTreePtr op1 = treeNode->gtGetOp1();
1461 var_types targetType = treeNode->TypeGet();
1463 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1464 if (targetType == TYP_VOID)
1466 assert(op1 == nullptr);
1467 if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
1469 // System V AMD64 spec requires that when a struct is returned by a hidden
1470 // argument the RAX should contain the value of the hidden retbuf arg.
1471 getEmitter()->emitIns_R_S(INS_mov, EA_BYREF, REG_RAX, compiler->info.compRetBuffArg, 0);
1476 noway_assert((op1->OperGet() == GT_LCL_VAR) ||
1477 (op1->OperGet() == GT_CALL));
1479 if (op1->OperGet() == GT_LCL_VAR)
1481 assert(op1->isContained());
1483 GenTreeLclVarCommon* lclVarPtr = op1->AsLclVarCommon();
1484 LclVarDsc* varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
1485 assert(varDsc->lvIsMultiRegArgOrRet);
1487 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
1488 assert(typeHnd != nullptr);
1490 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
1491 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
1492 assert(structDesc.passedInRegisters);
1493 assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
1495 regNumber retReg0 = REG_NA;
1496 unsigned __int8 offset0 = 0;
1497 regNumber retReg1 = REG_NA;
1498 unsigned __int8 offset1 = 0;
1500 var_types type0 = TYP_UNKNOWN;
1501 var_types type1 = TYP_UNKNOWN;
1503 getStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
1504 getStructReturnRegisters(type0, type1, &retReg0, &retReg1);
1506 // Move the values into the return registers.
1509 assert(retReg0 != REG_NA && retReg1 != REG_NA);
1511 getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), retReg0, lclVarPtr->gtLclNum, offset0);
1512 getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), retReg1, lclVarPtr->gtLclNum, offset1);
1515 // Nothing to do if the op1 of the return statement is a GT_CALL. The call already has the return
1516 // registers in the proper return registers.
1517 // This assumes that registers never get spilled. There is an Issue 2966 created to track the need
1518 // for handling the GT_CALL case of two register returns and handle it properly for stress modes
1519 // and potential other changes that may break this assumption.
1522 assert("!unreached");
1526 //------------------------------------------------------------------------
1527 // genReturn: Generates code for return statement.
1528 // In case of struct return, delegates to the genStructReturn method.
1531 // treeNode - The GT_RETURN or GT_RETFILT tree node.
1537 CodeGen::genReturn(GenTreePtr treeNode)
1539 assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
1540 GenTreePtr op1 = treeNode->gtGetOp1();
1541 var_types targetType = treeNode->TypeGet();
1544 if (targetType == TYP_VOID)
1546 assert(op1 == nullptr);
1551 if (treeNode->TypeGet() == TYP_LONG)
1553 assert(op1 != nullptr);
1554 noway_assert(op1->OperGet() == GT_LONG);
1555 GenTree* loRetVal = op1->gtGetOp1();
1556 GenTree* hiRetVal = op1->gtGetOp2();
1557 noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
1559 genConsumeReg(loRetVal);
1560 genConsumeReg(hiRetVal);
1561 if (loRetVal->gtRegNum != REG_LNGRET_LO)
1563 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
1565 if (hiRetVal->gtRegNum != REG_LNGRET_HI)
1567 inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
1571 #endif // !defined(_TARGET_X86_)
1573 if (isStructReturn(treeNode))
1575 genStructReturn(treeNode);
1577 else if (targetType != TYP_VOID)
1579 assert(op1 != nullptr);
1580 noway_assert(op1->gtRegNum != REG_NA);
1582 // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
1583 // consumed a reg for the operand. This is because the variable
1584 // is dead after return. But we are issuing more instructions
1585 // like "profiler leave callback" after this consumption. So
1586 // if you are issuing more instructions after this point,
1587 // remember to keep the variable live up until the new method
1588 // exit point where it is actually dead.
1591 regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
1593 if (varTypeIsFloating(treeNode))
1595 if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister)
1597 // Store local variable to its home location, if necessary.
1598 if ((op1->gtFlags & GTF_REG_VAL) != 0)
1600 op1->gtFlags &= ~GTF_REG_VAL;
1601 inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum);
1603 // Now, load it to the fp stack.
1604 getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
1608 // Spill the value, which should be in a register, then load it to the fp stack.
1609 // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
1610 op1->gtFlags |= GTF_SPILL;
1611 regSet.rsSpillTree(op1->gtRegNum, op1);
1612 op1->gtFlags |= GTF_SPILLED;
1613 op1->gtFlags &= ~GTF_SPILL;
1615 TempDsc* t = regSet.rsUnspillInPlace(op1);
1616 inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
1617 op1->gtFlags &= ~GTF_SPILLED;
1618 compiler->tmpRlsTemp(t);
1622 #endif // _TARGET_X86_
1624 if (op1->gtRegNum != retReg)
1626 inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
1632 #ifdef PROFILING_SUPPORTED
1634 // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
1635 // the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
1636 // in the handling of the GT_RETURN statement.
1637 // Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
1638 // for the return registers containing GC refs.
1640 // There will be a single return block while generating profiler ELT callbacks.
1642 // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
1643 // In flowgraph and other places assert that the last node of a block marked as
1644 // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
1645 // maintain such an invariant irrespective of whether profiler hook needed or not.
1646 // Also, there is not much to be gained by materializing it as an explicit node.
1647 if (compiler->compCurBB == compiler->genReturnBB)
1650 // Since we are invalidating the assumption that we would slip into the epilog
1651 // right after the "return", we need to preserve the return reg's GC state
1652 // across the call until actual method return.
1653 if (varTypeIsGC(compiler->info.compRetType))
1655 gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
1658 genProfilingLeaveCallback();
1660 if (varTypeIsGC(compiler->info.compRetType))
1662 gcInfo.gcMarkRegSetNpt(REG_INTRET);
1668 /*****************************************************************************
1670 * Generate code for a single node in the tree.
1671 * Preconditions: All operands have been evaluated
1675 CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
1677 regNumber targetReg;
1678 #if !defined(_TARGET_64BIT_)
1679 if (treeNode->TypeGet() == TYP_LONG)
1681 // All long enregistered nodes will have been decomposed into their
1682 // constituent lo and hi nodes.
1683 regPairNo targetPair = treeNode->gtRegPair;
1684 noway_assert(targetPair == REG_PAIR_NONE);
1688 #endif // !defined(_TARGET_64BIT_)
1690 targetReg = treeNode->gtRegNum;
1692 var_types targetType = treeNode->TypeGet();
1693 emitter *emit = getEmitter();
1696 // Validate that all the operands for the current node are consumed in order.
1697 // This is important because LSRA ensures that any necessary copies will be
1698 // handled correctly.
1699 lastConsumedNode = nullptr;
1700 if (compiler->verbose)
1702 unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
1703 printf("Generating: ");
1704 compiler->gtDispTree(treeNode, nullptr, nullptr, true);
1708 // Is this a node whose value is already in a register? LSRA denotes this by
1709 // setting the GTF_REUSE_REG_VAL flag.
1710 if (treeNode->IsReuseRegVal())
1712 // For now, this is only used for constant nodes.
1713 assert((treeNode->OperIsConst()));
1714 JITDUMP(" TreeNode is marked ReuseReg\n");
1718 // contained nodes are part of their parents for codegen purposes
1719 // ex : immediates, most LEAs
1720 if (treeNode->isContained())
1725 switch (treeNode->gtOper)
1727 case GT_START_NONGC:
1728 getEmitter()->emitDisableGC();
1732 #ifdef PROFILING_SUPPORTED
1733 // We should be seeing this only if profiler hook is needed
1734 noway_assert(compiler->compIsProfilerHookNeeded());
1736 // Right now this node is used only for tail calls. In future if
1737 // we intend to use it for Enter or Leave hooks, add a data member
1738 // to this node indicating the kind of profiler hook. For example,
1739 // helper number can be used.
1740 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
1741 #endif // PROFILING_SUPPORTED
1745 genLclHeap(treeNode);
1750 NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants");
1751 #endif // _TARGET_X86_
1755 genSetRegToConst(targetReg, targetType, treeNode);
1756 genProduceReg(treeNode);
1761 if (varTypeIsFloating(targetType))
1763 assert(treeNode->gtOper == GT_NEG);
1764 genSSE2BitwiseOp(treeNode);
1768 GenTreePtr operand = treeNode->gtGetOp1();
1769 assert(!operand->isContained());
1770 regNumber operandReg = genConsumeReg(operand);
1772 if (operandReg != targetReg)
1774 inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
1777 instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
1778 inst_RV(ins, targetReg, targetType);
1780 genProduceReg(treeNode);
1786 assert(varTypeIsIntegralOrI(treeNode));
1789 #if !defined(_TARGET_64BIT_)
1792 #endif // !defined(_TARGET_64BIT_)
1795 genCodeForBinary(treeNode);
1803 genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode);
1804 // genCodeForShift() calls genProduceReg()
1808 #if !defined(_TARGET_64BIT_)
1809 // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not
1810 // yet support casting FROM a long type either, and that's simpler to catch
1812 NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG");
1813 #endif // !defined(_TARGET_64BIT_)
1815 if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
1817 // Casts float/double <--> double/float
1818 genFloatToFloatCast(treeNode);
1820 else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
1822 // Casts float/double --> int32/int64
1823 genFloatToIntCast(treeNode);
1825 else if (varTypeIsFloating(targetType))
1827 // Casts int32/uint32/int64/uint64 --> float/double
1828 genIntToFloatCast(treeNode);
1832 // Casts int <--> int
1833 genIntToIntCast(treeNode);
1835 // The per-case functions call genProduceReg()
1840 // lcl_vars are not defs
1841 assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
1843 GenTreeLclVarCommon *lcl = treeNode->AsLclVarCommon();
1844 bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
1846 if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
1848 assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
1851 // If this is a register candidate that has been spilled, genConsumeReg() will
1852 // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
1854 if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
1856 assert(!isRegCandidate);
1858 emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
1859 emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
1860 genProduceReg(treeNode);
1865 case GT_LCL_FLD_ADDR:
1866 case GT_LCL_VAR_ADDR:
1868 // Address of a local var. This by itself should never be allocated a register.
1869 // If it is worth storing the address in a register then it should be cse'ed into
1870 // a temp and that would be allocated a register.
1871 noway_assert(targetType == TYP_BYREF);
1872 noway_assert(!treeNode->InReg());
1874 inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
1876 genProduceReg(treeNode);
1881 noway_assert(targetType != TYP_STRUCT);
1882 noway_assert(treeNode->gtRegNum != REG_NA);
1885 // Loading of TYP_SIMD12 (i.e. Vector3) field
1886 if (treeNode->TypeGet() == TYP_SIMD12)
1888 genLoadLclFldTypeSIMD12(treeNode);
1893 emitAttr size = emitTypeSize(targetType);
1894 unsigned offs = treeNode->gtLclFld.gtLclOffs;
1895 unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
1896 assert(varNum < compiler->lvaCount);
1898 emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
1900 genProduceReg(treeNode);
1903 case GT_STORE_LCL_FLD:
1905 if (!genStoreRegisterReturnInLclVar(treeNode))
1907 noway_assert(targetType != TYP_STRUCT);
1908 noway_assert(!treeNode->InReg());
1909 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
1912 // storing of TYP_SIMD12 (i.e. Vector3) field
1913 if (treeNode->TypeGet() == TYP_SIMD12)
1915 genStoreLclFldTypeSIMD12(treeNode);
1920 GenTreePtr op1 = treeNode->gtOp.gtOp1;
1921 genConsumeRegs(op1);
1922 emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
1927 case GT_STORE_LCL_VAR:
1929 if (!genStoreRegisterReturnInLclVar(treeNode))
1931 noway_assert(targetType != TYP_STRUCT);
1932 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
1934 unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
1935 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
1937 // Ensure that lclVar nodes are typed correctly.
1938 assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
1940 #if !defined(_TARGET_64BIT_)
1941 if (treeNode->TypeGet() == TYP_LONG)
1943 genStoreLongLclVar(treeNode);
1946 #endif // !defined(_TARGET_64BIT_)
1948 GenTreePtr op1 = treeNode->gtOp.gtOp1;
1949 genConsumeRegs(op1);
1951 if (treeNode->gtRegNum == REG_NA)
1954 emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode);
1955 varDsc->lvRegNum = REG_STK;
1959 bool containedOp1 = op1->isContained();
1960 // Look for the case where we have a constant zero which we've marked for reuse,
1961 // but which isn't actually in the register we want. In that case, it's better to create
1962 // zero in the target register, because an xor is smaller than a copy. Note that we could
1963 // potentially handle this in the register allocator, but we can't always catch it there
1964 // because the target may not have a register allocated for it yet.
1965 if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero())
1967 op1->gtRegNum = REG_NA;
1968 op1->ResetReuseRegVal();
1969 containedOp1 = true;
1974 // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
1975 // must be a constant. However, in the future we might want to support a contained memory op.
1976 // This is a bit tricky because we have to decide it's contained before register allocation,
1977 // and this would be a case where, once that's done, we need to mark that node as always
1978 // requiring a register - which we always assume now anyway, but once we "optimize" that
1979 // we'll have to take cases like this into account.
1980 assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
1981 genSetRegToConst(treeNode->gtRegNum, targetType, op1);
1983 else if (op1->gtRegNum != treeNode->gtRegNum)
1985 assert(op1->gtRegNum != REG_NA);
1986 emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
1989 if (treeNode->gtRegNum != REG_NA)
1991 genProduceReg(treeNode);
1998 // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
1999 // the return register, if it's not already there. The processing is the same as GT_RETURN.
2000 if (targetType != TYP_VOID)
2002 // For filters, the IL spec says the result is type int32. Further, the only specified legal values
2003 // are 0 or 1, with the use of other values "undefined".
2004 assert(targetType == TYP_INT);
2010 genReturn(treeNode);
2015 // if we are here, it is the case where there is an LEA that cannot
2016 // be folded into a parent instruction
2017 GenTreeAddrMode *lea = treeNode->AsAddrMode();
2018 genLeaInstruction(lea);
2020 // genLeaInstruction calls genProduceReg()
2025 // Handling of Vector3 type values loaded through indirection.
2026 if (treeNode->TypeGet() == TYP_SIMD12)
2028 genLoadIndTypeSIMD12(treeNode);
2031 #endif // FEATURE_SIMD
2033 genConsumeAddress(treeNode->AsIndir()->Addr());
2034 emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
2035 genProduceReg(treeNode);
2039 genCodeForMulHi(treeNode->AsOp());
2040 genProduceReg(treeNode);
2046 emitAttr size = emitTypeSize(treeNode);
2047 bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
2048 bool requiresOverflowCheck = treeNode->gtOverflowEx();
2050 GenTree *op1 = treeNode->gtOp.gtOp1;
2051 GenTree *op2 = treeNode->gtOp.gtOp2;
2053 // there are 3 forms of x64 multiply:
2054 // 1-op form with 128 result: RDX:RAX = RAX * rm
2055 // 2-op form: reg *= rm
2056 // 3-op form: reg = rm * imm
2058 genConsumeOperands(treeNode->AsOp());
2060 // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
2062 // immOp :: Only one operand can be an immediate
2063 // rmOp :: Only one operand can be a memory op.
2064 // regOp :: A register op (especially the operand that matches 'targetReg')
2065 // (can be nullptr when we have both a memory op and an immediate op)
2067 GenTree * immOp = nullptr;
2068 GenTree * rmOp = op1;
2071 if (op2->isContainedIntOrIImmed())
2075 else if (op1->isContainedIntOrIImmed())
2081 if (immOp != nullptr)
2083 // This must be a non-floating point operation.
2084 assert(!varTypeIsFloating(treeNode));
2086 // CQ: When possible use LEA for mul by imm 3, 5 or 9
2087 ssize_t imm = immOp->AsIntConCommon()->IconValue();
2089 if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
2091 // We will use the LEA instruction to perform this multiply
2092 // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
2093 unsigned int scale = (unsigned int)(imm - 1);
2094 getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
2098 // use the 3-op form with immediate
2099 ins = getEmitter()->inst3opImulForReg(targetReg);
2100 emit->emitInsBinary(ins, size, rmOp, immOp);
2103 else // we have no contained immediate operand
2108 regNumber mulTargetReg = targetReg;
2109 if (isUnsignedMultiply && requiresOverflowCheck)
2112 mulTargetReg = REG_RAX;
2116 ins = genGetInsForOper(GT_MUL, targetType);
2119 // Set rmOp to the contain memory operand (if any)
2120 // or set regOp to the op2 when it has the matching target register for our multiply op
2122 if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
2127 assert(!regOp->isContained());
2129 // Setup targetReg when neither of the source operands was a matching register
2130 if (regOp->gtRegNum != mulTargetReg)
2132 inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
2135 emit->emitInsBinary(ins, size, treeNode, rmOp);
2137 // Move the result to the desired register, if necessary
2138 if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
2140 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
2144 if (requiresOverflowCheck)
2146 // Overflow checking is only used for non-floating point types
2147 noway_assert(!varTypeIsFloating(treeNode));
2149 genCheckOverflow(treeNode);
2152 genProduceReg(treeNode);
2158 // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
2159 // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
2160 // on float/double args.
2161 noway_assert(!varTypeIsFloating(treeNode));
2165 genCodeForDivMod(treeNode->AsOp());
2169 genIntrinsic(treeNode);
2174 genSIMDIntrinsic(treeNode->AsSIMD());
2176 #endif // FEATURE_SIMD
2179 genCkfinite(treeNode);
2189 // TODO-XArch-CQ: Check if we can use the currently set flags.
2190 // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
2191 // (signed < or >= where targetReg != REG_NA)
2193 GenTreePtr op1 = treeNode->gtGetOp1();
2194 var_types op1Type = op1->TypeGet();
2196 if (varTypeIsFloating(op1Type))
2198 genCompareFloat(treeNode);
2200 #if !defined(_TARGET_64BIT_)
2201 // X86 Long comparison
2202 else if (varTypeIsLong(op1Type))
2204 genCompareLong(treeNode);
2206 #endif // !defined(_TARGET_64BIT_)
2209 genCompareInt(treeNode);
2216 GenTree *cmp = treeNode->gtOp.gtOp1;
2217 assert(cmp->OperIsCompare());
2218 assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
2220 // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp
2221 // is governed by a flag NOT by the inherent type of the node
2222 // TODO-XArch-CQ: Check if we can use the currently set flags.
2223 emitJumpKind jumpKind[2];
2224 bool branchToTrueLabel[2];
2225 genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
2227 BasicBlock* skipLabel = nullptr;
2228 if (jumpKind[0] != EJ_NONE)
2230 BasicBlock *jmpTarget;
2231 if (branchToTrueLabel[0])
2233 jmpTarget = compiler->compCurBB->bbJumpDest;
2237 // This case arises only for ordered GT_EQ right now
2238 assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
2239 skipLabel = genCreateTempLabel();
2240 jmpTarget = skipLabel;
2243 inst_JMP(jumpKind[0], jmpTarget);
2246 if (jumpKind[1] != EJ_NONE)
2248 // the second conditional branch always has to be to the true label
2249 assert(branchToTrueLabel[1]);
2250 inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
2253 if (skipLabel != nullptr)
2254 genDefineTempLabel(skipLabel);
2260 // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
2261 // based on the contents of 'data'
2263 GenTree *data = treeNode->gtOp.gtOp1;
2264 genConsumeRegs(data);
2265 GenTreeIntCon cns = intForm(TYP_INT, 0);
2266 emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
2268 BasicBlock* skipLabel = genCreateTempLabel();
2270 inst_JMP(genJumpKindForOper(GT_EQ, true), skipLabel);
2272 // emit the call to the EE-helper that stops for GC (or other reasons)
2273 assert(treeNode->gtRsvdRegs != RBM_NONE);
2274 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
2275 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
2276 assert(genIsValidIntReg(tmpReg));
2278 genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
2279 genDefineTempLabel(skipLabel);
2284 genStoreInd(treeNode);
2288 // This is handled at the time we call genConsumeReg() on the GT_COPY
2293 // Swap is only supported for lclVar operands that are enregistered
2294 // We do not consume or produce any registers. Both operands remain enregistered.
2295 // However, the gc-ness may change.
2296 assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
2298 GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
2299 LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
2300 var_types type1 = varDsc1->TypeGet();
2301 GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
2302 LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
2303 var_types type2 = varDsc2->TypeGet();
2305 // We must have both int or both fp regs
2306 assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
2308 // FP swap is not yet implemented (and should have NYI'd in LSRA)
2309 assert(!varTypeIsFloating(type1));
2311 regNumber oldOp1Reg = lcl1->gtRegNum;
2312 regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
2313 regNumber oldOp2Reg = lcl2->gtRegNum;
2314 regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
2316 // We don't call genUpdateVarReg because we don't have a tree node with the new register.
2317 varDsc1->lvRegNum = oldOp2Reg;
2318 varDsc2->lvRegNum = oldOp1Reg;
2321 emitAttr size = EA_PTRSIZE;
2322 if (varTypeGCtype(type1) != varTypeGCtype(type2))
2324 // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
2325 // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
2328 inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
2330 // Update the gcInfo.
2331 // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
2332 gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask|oldOp2RegMask);
2333 gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask|oldOp2RegMask);
2335 // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
2336 // It will also dump the updates.
2337 gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
2338 gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
2348 genPutArgStk(treeNode);
2353 #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
2354 noway_assert(targetType != TYP_STRUCT);
2355 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
2356 // commas show up here commonly, as part of a nullchk operation
2357 GenTree *op1 = treeNode->gtOp.gtOp1;
2358 // If child node is not already in the register we need, move it
2360 if (treeNode->gtRegNum != op1->gtRegNum)
2362 inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
2364 genProduceReg(treeNode);
2369 genCallInstruction(treeNode);
2373 genJmpMethod(treeNode);
2379 genLockedInstructions(treeNode);
2382 case GT_MEMORYBARRIER:
2383 instGen_MemoryBarrier();
2388 GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1
2389 GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2
2390 GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3
2392 assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
2393 assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
2395 genConsumeReg(location);
2396 genConsumeReg(value);
2397 genConsumeReg(comparand);
2398 // comparand goes to RAX;
2399 // Note that we must issue this move after the genConsumeRegs(), in case any of the above
2400 // have a GT_COPY from RAX.
2401 if (comparand->gtRegNum != REG_RAX)
2403 inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
2410 emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
2413 if (targetReg != REG_RAX)
2415 inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
2418 genProduceReg(treeNode);
2422 // do nothing - reload is just a marker.
2423 // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
2424 // into the register specified in this node.
2431 if (treeNode->gtFlags & GTF_NO_OP_NO)
2433 noway_assert(!"GTF_NO_OP_NO should not be set");
2437 getEmitter()->emitIns_Nop(1);
2441 case GT_ARR_BOUNDS_CHECK:
2444 #endif // FEATURE_SIMD
2445 genRangeCheck(treeNode);
2449 if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
2451 inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
2453 genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
2455 genProduceReg(treeNode);
2463 assert(!treeNode->gtOp.gtOp1->isContained());
2464 regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
2465 emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
2471 noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
2473 /* Catch arguments get passed in a register. genCodeForBBlist()
2474 would have marked it as holding a GC object, but not used. */
2476 noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
2477 genConsumeReg(treeNode);
2480 #if !FEATURE_EH_FUNCLETS
2482 NYI_X86("GT_END_LFIN codegen");
2485 case GT_PINVOKE_PROLOG:
2486 noway_assert(((gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
2488 // the runtime side requires the codegen here to be consistent
2489 emit->emitDisableRandomNops();
2493 genPendingCallLabel = genCreateTempLabel();
2494 treeNode->gtLabel.gtLabBB = genPendingCallLabel;
2495 emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
2499 genCodeForCpObj(treeNode->AsCpObj());
2504 GenTreeCpBlk* cpBlkOp = treeNode->AsCpBlk();
2505 if (cpBlkOp->gtBlkOpGcUnsafe)
2507 getEmitter()->emitDisableGC();
2510 switch (cpBlkOp->gtBlkOpKind)
2512 #ifdef _TARGET_AMD64_
2513 case GenTreeBlkOp::BlkOpKindHelper:
2514 genCodeForCpBlk(cpBlkOp);
2516 #endif // _TARGET_AMD64_
2517 case GenTreeBlkOp::BlkOpKindRepInstr:
2518 genCodeForCpBlkRepMovs(cpBlkOp);
2520 case GenTreeBlkOp::BlkOpKindUnroll:
2521 genCodeForCpBlkUnroll(cpBlkOp);
2526 if (cpBlkOp->gtBlkOpGcUnsafe)
2528 getEmitter()->emitEnableGC();
2535 GenTreeInitBlk* initBlkOp = treeNode->AsInitBlk();
2536 switch (initBlkOp->gtBlkOpKind)
2538 case GenTreeBlkOp::BlkOpKindHelper:
2539 genCodeForInitBlk(initBlkOp);
2541 case GenTreeBlkOp::BlkOpKindRepInstr:
2542 genCodeForInitBlkRepStos(initBlkOp);
2544 case GenTreeBlkOp::BlkOpKindUnroll:
2545 genCodeForInitBlkUnroll(initBlkOp);
2554 genJumpTable(treeNode);
2557 case GT_SWITCH_TABLE:
2558 genTableBasedSwitch(treeNode);
2562 genCodeForArrIndex(treeNode->AsArrIndex());
2566 genCodeForArrOffset(treeNode->AsArrOffs());
2569 case GT_CLS_VAR_ADDR:
2570 getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
2571 genProduceReg(treeNode);
2578 sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
2580 assert(!"Unknown node in codegen");
2586 //------------------------------------------------------------------------
2587 // genStoreRegisterReturnInLclVar: This method handles storing double register return struct value to a
2588 // local homing stack location.
2591 // treeNode - the tree which should be homed in local frame stack location.
2594 // For System V AMD64 sistems it returns true if this is a struct and storing of the returned
2595 // register value is handled. It returns false otherwise.
2596 // For all other targets returns false.
2599 CodeGen::genStoreRegisterReturnInLclVar(GenTreePtr treeNode)
2601 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2602 if (varTypeIsStruct(treeNode))
2604 GenTreeLclVarCommon* lclVarPtr = treeNode->AsLclVarCommon();
2606 // TODO-Cleanup: It is not reasonable to assume that a local store of TYP_STRUCT is always a register return.
2607 // There can be local SIMD references that are NOT args or returns.
2608 // Furthermore, this means that there are contextual semantics for these nodes,
2609 // which is very undesirable.
2611 if (varTypeIsSIMD(treeNode))
2613 noway_assert(treeNode->OperIsLocalStore());
2614 if (treeNode->gtGetOp1()->OperGet() != GT_CALL)
2620 noway_assert(!treeNode->InReg());
2622 LclVarDsc * varDsc = &(compiler->lvaTable[lclVarPtr->gtLclNum]);
2624 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
2625 assert(typeHnd != nullptr);
2626 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
2627 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
2629 assert(structDesc.passedInRegisters);
2631 // TODO-Amd64-Unix: Have Lubo Review this change
2632 // Test case JIT.opt.ETW.TailCallCases.TailCallCases has eightByteCount == 1
2633 // This occurs with a TYP_STRUCT that is 3 bytes in size
2634 // commenting out this assert results in correct codegen
2636 // assert(structDesc.eightByteCount == CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
2638 GenTreePtr op1 = treeNode->gtOp.gtOp1;
2639 genConsumeRegs(op1);
2641 regNumber retReg0 = REG_NA;
2642 regNumber retReg1 = REG_NA;
2644 unsigned __int8 offset0 = 0;
2645 unsigned __int8 offset1 = 0;
2647 var_types type0 = TYP_UNKNOWN;
2648 var_types type1 = TYP_UNKNOWN;
2650 getStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
2651 getStructReturnRegisters(type0, type1, &retReg0, &retReg1);
2653 assert(retReg0 != REG_NA && retReg1 != REG_NA);
2655 getEmitter()->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), retReg0, lclVarPtr->gtLclNum, offset0);
2656 getEmitter()->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), retReg1, lclVarPtr->gtLclNum, offset1);
2660 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
2665 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2666 //------------------------------------------------------------------------
2667 // getStructReturnRegisters: Returns the return registers for a specific struct types.
2670 // type0 - the type of the first eightbyte to be returned.
2671 // type1 - the type of the second eightbyte to be returned.
2672 // retRegPtr0 - returns the register for the first eightbyte.
2673 // retRegPtr1 - returns the register for the second eightbyte.
2677 CodeGen::getStructReturnRegisters(var_types type0,
2679 regNumber* retRegPtr0,
2680 regNumber* retRegPtr1)
2682 *retRegPtr0 = REG_NA;
2683 *retRegPtr1 = REG_NA;
2685 bool firstIntUsed = false;
2686 bool firstFloatUsed = false;
2688 if (type0 != TYP_UNKNOWN)
2690 if (varTypeIsIntegralOrI(type0))
2692 *retRegPtr0 = REG_INTRET;
2693 firstIntUsed = true;
2695 else if (varTypeIsFloating(type0))
2697 *retRegPtr0 = REG_FLOATRET;
2698 firstFloatUsed = true;
2706 if (type1 != TYP_UNKNOWN)
2708 if (varTypeIsIntegralOrI(type1))
2712 *retRegPtr1 = REG_INTRET_1;
2716 *retRegPtr1 = REG_INTRET;
2719 else if (varTypeIsFloating(type1))
2723 *retRegPtr1 = REG_FLOATRET_1;
2727 *retRegPtr1 = REG_FLOATRET;
2736 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
2738 // Generate code for division (or mod) by power of two
2739 // or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
2740 // Op2 must be a contained integer constant.
2742 CodeGen::genCodeForPow2Div(GenTreeOp* tree)
2744 GenTree *dividend = tree->gtOp.gtOp1;
2745 GenTree *divisor = tree->gtOp.gtOp2;
2746 genTreeOps oper = tree->OperGet();
2747 emitAttr size = emitTypeSize(tree);
2748 emitter *emit = getEmitter();
2749 regNumber targetReg = tree->gtRegNum;
2750 var_types targetType = tree->TypeGet();
2752 bool isSigned = oper == GT_MOD || oper == GT_DIV;
2754 // precondition: extended dividend is in RDX:RAX
2755 // which means it is either all zeros or all ones
2757 noway_assert(divisor->isContained());
2758 GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
2759 ssize_t imm = divImm->IconValue();
2760 ssize_t abs_imm = abs(imm);
2761 noway_assert(isPow2(abs_imm));
2770 if (targetReg != REG_RAX)
2771 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
2775 assert(oper == GT_MOD);
2776 instGen_Set_Reg_To_Zero(size, targetReg);
2786 emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1
2787 // xor with rdx will flip all bits if negative
2788 emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0
2792 assert(oper == GT_DIV);
2793 // add 1 if it's negative
2794 emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
2799 // add imm-1 if negative
2800 emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1);
2801 emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX);
2806 unsigned shiftAmount = genLog2(unsigned(abs_imm));
2807 inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount);
2811 emit->emitIns_R(INS_neg, size, REG_RAX);
2816 assert(oper == GT_MOD);
2819 emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1);
2821 // RDX contains 'imm-1' if negative
2822 emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
2825 if (targetReg != REG_RAX)
2827 inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
2834 if (targetReg != dividend->gtRegNum)
2836 inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType);
2839 if (oper == GT_UDIV)
2841 inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm)));
2845 assert(oper == GT_UMOD);
2847 emit->emitIns_R_I(INS_and, size, targetReg, imm -1);
2853 /***********************************************************************************************
2854 * Generate code for localloc
2857 CodeGen::genLclHeap(GenTreePtr tree)
2859 NYI_X86("Localloc");
2860 assert(tree->OperGet() == GT_LCLHEAP);
2862 GenTreePtr size = tree->gtOp.gtOp1;
2863 noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
2865 regNumber targetReg = tree->gtRegNum;
2866 regMaskTP tmpRegsMask = tree->gtRsvdRegs;
2867 regNumber regCnt = REG_NA;
2868 regNumber pspSymReg = REG_NA;
2869 var_types type = genActualType(size->gtType);
2870 emitAttr easz = emitTypeSize(type);
2871 BasicBlock* endLabel = nullptr;
2875 if (compiler->opts.compStackCheckOnRet)
2877 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
2878 getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
2880 BasicBlock * esp_check = genCreateTempLabel();
2881 inst_JMP(genJumpKindForOper(GT_EQ, true), esp_check);
2882 getEmitter()->emitIns(INS_BREAKPOINT);
2883 genDefineTempLabel(esp_check);
2887 noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
2888 noway_assert(genStackLevel == 0); // Can't have anything on the stack
2890 // Whether method has PSPSym.
2892 unsigned stackAdjustment = 0;
2893 BasicBlock* loop = NULL;
2894 #if FEATURE_EH_FUNCLETS
2895 hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
2900 // compute the amount of memory to allocate to properly STACK_ALIGN.
2902 if (size->IsCnsIntOrI())
2904 // If size is a constant, then it must be contained.
2905 assert(size->isContained());
2907 // If amount is zero then return null in targetReg
2908 amount = size->gtIntCon.gtIconVal;
2911 instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
2915 // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN
2916 amount = AlignUp(amount, STACK_ALIGN);
2920 // If 0 bail out by returning null in targetReg
2921 genConsumeRegAndCopy(size, targetReg);
2922 endLabel = genCreateTempLabel();
2923 getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
2924 inst_JMP(EJ_je, endLabel);
2926 // Compute the size of the block to allocate and perform alignment.
2927 // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt,
2928 // since we don't need any internal registers.
2929 if (!hasPspSym && compiler->info.compInitMem)
2931 assert(genCountBits(tmpRegsMask) == 0);
2936 assert(genCountBits(tmpRegsMask) >= 1);
2937 regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
2938 tmpRegsMask &= ~regCntMask;
2939 regCnt = genRegNumFromMask(regCntMask);
2940 if (regCnt != targetReg)
2941 inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
2944 // Align to STACK_ALIGN
2945 // regCnt will be the total number of bytes to localloc
2946 inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
2947 inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
2950 #if FEATURE_EH_FUNCLETS
2951 // If we have PSPsym, then need to re-locate it after localloc.
2954 stackAdjustment += STACK_ALIGN;
2956 // Save a copy of PSPSym
2957 assert(genCountBits(tmpRegsMask) >= 1);
2958 regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask);
2959 tmpRegsMask &= ~pspSymRegMask;
2960 pspSymReg = genRegNumFromMask(pspSymRegMask);
2961 getEmitter()->emitIns_R_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
2966 #if FEATURE_FIXED_OUT_ARGS
2967 // If we have an outgoing arg area then we must adjust the SP by popping off the
2968 // outgoing arg area. We will restore it right before we return from this method.
2970 // Localloc is supposed to return stack space that is STACK_ALIGN'ed. The following
2971 // are the cases that needs to be handled:
2972 // i) Method has PSPSym + out-going arg area.
2973 // It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs).
2974 // Therefore, we will pop-off RSP upto out-going arg area before locallocating.
2975 // We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area.
2976 // ii) Method has no PSPSym but out-going arg area.
2977 // Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed.
2978 // iii) Method has PSPSym but no out-going arg area.
2979 // Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded.
2980 // iv) Method has neither PSPSym nor out-going arg area.
2981 // Nothing needs to popped off from stack nor relocated.
2982 if (compiler->lvaOutgoingArgSpaceSize > 0)
2984 assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
2985 inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
2986 stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
2989 if (size->IsCnsIntOrI())
2991 // We should reach here only for non-zero, constant size allocations.
2994 // For small allocations we will generate up to six push 0 inline
2995 size_t cntPtrSizedWords = (amount >> STACK_ALIGN_SHIFT);
2996 if (cntPtrSizedWords <= 6)
2998 while (cntPtrSizedWords != 0)
3000 // push_hide means don't track the stack
3001 inst_IV(INS_push_hide, 0);
3007 else if (!compiler->info.compInitMem && (amount < CORINFO_PAGE_SIZE)) // must be < not <=
3009 // Since the size is a page or less, simply adjust ESP
3010 // ESP might already be in the guard page, must touch it BEFORE
3011 // the alloc, not after.
3012 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
3013 inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
3017 // else, "mov regCnt, amount"
3018 // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt.
3019 // Since size is a constant, regCnt is not yet initialized.
3020 assert(regCnt == REG_NA);
3021 if (!hasPspSym && compiler->info.compInitMem)
3023 assert(genCountBits(tmpRegsMask) == 0);
3028 assert(genCountBits(tmpRegsMask) >= 1);
3029 regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
3030 tmpRegsMask &= ~regCntMask;
3031 regCnt = genRegNumFromMask(regCntMask);
3033 genSetRegToIcon(regCnt, amount, ((int)amount == amount)? TYP_INT : TYP_LONG);
3036 loop = genCreateTempLabel();
3037 if (compiler->info.compInitMem)
3039 // At this point 'regCnt' is set to the total number of bytes to locAlloc.
3040 // Since we have to zero out the allocated memory AND ensure that RSP is always valid
3041 // by tickling the pages, we will just push 0's on the stack.
3043 // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
3044 // and localloc size is a multiple of STACK_ALIGN.
3047 genDefineTempLabel(loop);
3049 // dec is a 2 byte instruction, but sub is 4 (could be 3 if
3050 // we know size is TYP_INT instead of TYP_I_IMPL)
3051 // Also we know that we can only push 8 bytes at a time, but
3052 // alignment is 16 bytes, so we can push twice and do a sub
3053 // for just a little bit of loop unrolling
3054 inst_IV(INS_push_hide, 0); // --- push 0
3055 inst_IV(INS_push_hide, 0); // --- push 0
3057 // If not done, loop
3058 // Note that regCnt is the number of bytes to stack allocate.
3059 // Therefore we need to subtract 16 from regcnt here.
3060 assert(genIsValidIntReg(regCnt));
3061 inst_RV_IV(INS_sub, regCnt, 16, emitActualTypeSize(type));
3062 inst_JMP(EJ_jne, loop);
3066 //At this point 'regCnt' is set to the total number of bytes to locAlloc.
3068 //We don't need to zero out the allocated memory. However, we do have
3069 //to tickle the pages to ensure that ESP is always valid and is
3070 //in sync with the "stack guard page". Note that in the worst
3071 //case ESP is on the last byte of the guard page. Thus you must
3072 //touch ESP+0 first not ESP+x01000.
3074 //Another subtlety is that you don't want ESP to be exactly on the
3075 //boundary of the guard page because PUSH is predecrement, thus
3076 //call setup would not touch the guard page but just beyond it
3078 //Note that we go through a few hoops so that ESP never points to
3079 //illegal pages at any time during the ticking process
3082 // add REGCNT, ESP // reg now holds ultimate ESP
3083 // jb loop // result is smaller than orignial ESP (no wrap around)
3084 // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
3086 // test ESP, [ESP+0] // tickle the page
3088 // sub REGTMP, PAGE_SIZE
3095 inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
3096 inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
3097 inst_JMP(EJ_jb, loop);
3099 instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
3101 genDefineTempLabel(loop);
3103 // Tickle the decremented value, and move back to ESP,
3104 // note that it has to be done BEFORE the update of ESP since
3105 // ESP might already be on the guard page. It is OK to leave
3106 // the final value of ESP on the guard page
3107 getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
3109 // This is a harmless trick to avoid the emitter trying to track the
3110 // decrement of the ESP - we do the subtraction in another reg instead
3111 // of adjusting ESP directly.
3112 assert(tmpRegsMask != RBM_NONE);
3113 assert(genCountBits(tmpRegsMask) == 1);
3114 regNumber regTmp = genRegNumFromMask(tmpRegsMask);
3116 inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
3117 inst_RV_IV(INS_sub, regTmp, CORINFO_PAGE_SIZE, EA_PTRSIZE);
3118 inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
3120 inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
3121 inst_JMP(EJ_jae, loop);
3123 // Move the final value to ESP
3124 inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
3128 // Re-adjust SP to allocate PSPSym and out-going arg area
3129 if (stackAdjustment > 0)
3131 assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
3132 inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
3134 #if FEATURE_EH_FUNCLETS
3135 // Write PSPSym to its new location.
3138 assert(genIsValidIntReg(pspSymReg));
3139 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
3144 // Return the stackalloc'ed address in result register.
3145 // TargetReg = RSP + stackAdjustment.
3146 getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
3149 if (endLabel != nullptr)
3150 genDefineTempLabel(endLabel);
3152 // Write the lvaShadowSPfirst stack frame slot
3153 noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
3154 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
3157 if (compiler->opts.compNeedStackProbes)
3159 genGenerateStackProbe();
3165 if (compiler->opts.compStackCheckOnRet)
3167 noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
3168 getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
3172 genProduceReg(tree);
3175 // Generate code for InitBlk using rep stos.
3177 // The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
3178 // Any value larger than that, we'll use the helper even if both the
3179 // fill byte and the size are integer constants.
3180 void CodeGen::genCodeForInitBlkRepStos(GenTreeInitBlk* initBlkNode)
3182 // Make sure we got the arguments of the initblk/initobj operation in the right registers
3183 GenTreePtr blockSize = initBlkNode->Size();
3184 GenTreePtr dstAddr = initBlkNode->Dest();
3185 GenTreePtr initVal = initBlkNode->InitVal();
3188 assert(!dstAddr->isContained());
3189 assert(!initVal->isContained());
3190 assert(!blockSize->isContained());
3192 #ifdef _TARGET_AMD64_
3193 assert(blockSize->gtSkipReloadOrCopy()->IsCnsIntOrI());
3197 if (blockSize->gtSkipReloadOrCopy()->IsCnsIntOrI())
3200 size_t size = blockSize->gtIntCon.gtIconVal;
3201 if (initVal->IsCnsIntOrI())
3203 #ifdef _TARGET_AMD64_
3204 assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
3206 assert(size > CPBLK_UNROLL_LIMIT);
3213 genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
3214 instGen(INS_r_stosb);
3217 // Generate code for InitBlk by performing a loop unroll
3219 // a) Both the size and fill byte value are integer constants.
3220 // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
3222 void CodeGen::genCodeForInitBlkUnroll(GenTreeInitBlk* initBlkNode)
3224 // Make sure we got the arguments of the initblk/initobj operation in the right registers
3225 GenTreePtr blockSize = initBlkNode->Size();
3226 GenTreePtr dstAddr = initBlkNode->Dest();
3227 GenTreePtr initVal = initBlkNode->InitVal();
3230 assert(!dstAddr->isContained());
3231 assert(!initVal->isContained());
3232 assert(blockSize->isContained());
3234 assert(blockSize->IsCnsIntOrI());
3237 size_t size = blockSize->gtIntCon.gtIconVal;
3239 assert(size <= INITBLK_UNROLL_LIMIT);
3240 assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
3242 emitter *emit = getEmitter();
3244 genConsumeOperands(initBlkNode->gtGetOp1()->AsOp());
3246 // If the initVal was moved, or spilled and reloaded to a different register,
3247 // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
3248 // which needs to be the new register.
3249 regNumber valReg = initVal->gtRegNum;
3250 initVal = initVal->gtSkipReloadOrCopy();
3252 unsigned offset = 0;
3254 // Perform an unroll using SSE2 loads and stores.
3255 if (size >= XMM_REGSIZE_BYTES)
3257 regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs);
3260 assert(initBlkNode->gtRsvdRegs != RBM_NONE);
3261 assert(genCountBits(initBlkNode->gtRsvdRegs) == 1);
3262 assert(genIsValidFloatReg(tmpReg));
3265 if (initVal->gtIntCon.gtIconVal != 0)
3267 emit->emitIns_R_R(INS_mov_i2xmm, EA_8BYTE, tmpReg, valReg);
3268 emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
3272 emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg);
3275 // Determine how many 16 byte slots we're going to fill using SSE movs.
3276 size_t slots = size / XMM_REGSIZE_BYTES;
3280 emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
3281 offset += XMM_REGSIZE_BYTES;
3285 // Fill the remainder (or a < 16 byte sized struct)
3286 if ((size & 8) != 0)
3289 // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
3290 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
3292 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
3294 #else // !_TARGET_X86_
3295 emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
3297 #endif // !_TARGET_X86_
3299 if ((size & 4) != 0)
3301 emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
3304 if ((size & 2) != 0)
3306 emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
3309 if ((size & 1) != 0)
3311 emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
3315 // Generates code for InitBlk by calling the VM memset helper function.
3317 // a) The size argument of the InitBlk is not an integer constant.
3318 // b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
3319 void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode)
3321 #ifdef _TARGET_AMD64_
3322 // Make sure we got the arguments of the initblk operation in the right registers
3323 GenTreePtr blockSize = initBlkNode->Size();
3324 GenTreePtr dstAddr = initBlkNode->Dest();
3325 GenTreePtr initVal = initBlkNode->InitVal();
3328 assert(!dstAddr->isContained());
3329 assert(!initVal->isContained());
3330 assert(!blockSize->isContained());
3332 if (blockSize->IsCnsIntOrI())
3334 assert(blockSize->gtIntCon.gtIconVal >= INITBLK_STOS_LIMIT);
3338 genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
3340 genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
3341 #else // !_TARGET_AMD64_
3342 NYI_X86("Helper call for InitBlk");
3343 #endif // !_TARGET_AMD64_
3347 // Generate code for a load from some address + offset
3348 // baseNode: tree node which can be either a local address or arbitrary node
3349 // offset: distance from the baseNode from which to load
3350 void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
3352 emitter *emit = getEmitter();
3354 if (baseNode->OperIsLocalAddr())
3356 if (baseNode->gtOper == GT_LCL_FLD_ADDR)
3358 offset += baseNode->gtLclFld.gtLclOffs;
3360 emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
3364 emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
3368 //------------------------------------------------------------------------
3369 // genCodeForStoreOffset: Generate code to store a reg to [base + offset].
3372 // ins - the instruction to generate.
3373 // size - the size that needs to be stored.
3374 // src - the register which needs to be stored.
3375 // baseNode - the base, relative to which to store the src register.
3376 // offset - the offset that is added to the baseNode to calculate the address to store into.
3378 void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
3380 emitter *emit = getEmitter();
3382 if (baseNode->OperIsLocalAddr())
3384 if (baseNode->gtOper == GT_LCL_FLD_ADDR)
3386 offset += baseNode->gtLclFld.gtLclOffs;
3389 emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
3393 emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
3398 // Generates CpBlk code by performing a loop unroll
3400 // The size argument of the CpBlk node is a constant and <= 64 bytes.
3401 // This may seem small but covers >95% of the cases in several framework assemblies.
3403 void CodeGen::genCodeForCpBlkUnroll(GenTreeCpBlk* cpBlkNode)
3405 // Make sure we got the arguments of the cpblk operation in the right registers
3406 GenTreePtr blockSize = cpBlkNode->Size();
3407 GenTreePtr dstAddr = cpBlkNode->Dest();
3408 GenTreePtr srcAddr = cpBlkNode->Source();
3410 assert(blockSize->IsCnsIntOrI());
3411 size_t size = blockSize->gtIntCon.gtIconVal;
3412 assert(size <= CPBLK_UNROLL_LIMIT);
3414 emitter *emit = getEmitter();
3416 if (!srcAddr->isContained())
3417 genConsumeReg(srcAddr);
3419 if (!dstAddr->isContained())
3420 genConsumeReg(dstAddr);
3422 unsigned offset = 0;
3424 // If the size of this struct is larger than 16 bytes
3425 // let's use SSE2 to be able to do 16 byte at a time
3426 // loads and stores.
3428 if (size >= XMM_REGSIZE_BYTES)
3430 assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
3431 regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT);
3432 assert(genIsValidFloatReg(xmmReg));
3433 size_t slots = size / XMM_REGSIZE_BYTES;
3435 // TODO: In the below code the load and store instructions are for 16 bytes, but the
3436 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
3437 // this probably needs to be changed.
3441 genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
3443 genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
3444 offset += XMM_REGSIZE_BYTES;
3448 // Fill the remainder (15 bytes or less) if there's one.
3449 if ((size & 0xf) != 0)
3451 // Grab the integer temp register to emit the remaining loads and stores.
3452 regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
3454 if ((size & 8) != 0)
3457 // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
3458 for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
3460 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
3461 genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
3463 #else // !_TARGET_X86_
3464 genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
3465 genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
3467 #endif // !_TARGET_X86_
3469 if ((size & 4) != 0)
3471 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
3472 genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
3475 if ((size & 2) != 0)
3477 genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
3478 genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
3481 if ((size & 1) != 0)
3483 genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
3484 genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
3489 // Generate code for CpBlk by using rep movs
3491 // The size argument of the CpBlk is a constant and is between
3492 // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
3493 void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode)
3495 // Make sure we got the arguments of the cpblk operation in the right registers
3496 GenTreePtr blockSize = cpBlkNode->Size();
3497 GenTreePtr dstAddr = cpBlkNode->Dest();
3498 GenTreePtr srcAddr = cpBlkNode->Source();
3501 assert(!dstAddr->isContained());
3502 assert(!srcAddr->isContained());
3503 assert(!blockSize->isContained());
3506 #ifdef _TARGET_AMD64_
3507 assert(blockSize->IsCnsIntOrI());
3511 if (blockSize->IsCnsIntOrI())
3514 size_t size = blockSize->gtIntCon.gtIconVal;
3517 assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
3519 assert(size > CPBLK_UNROLL_LIMIT);
3524 genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
3525 instGen(INS_r_movsb);
3528 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3530 //---------------------------------------------------------------------------------------------------------------//
3531 // genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
3534 // putArgNode - the PutArgStk tree.
3535 // baseVarNum - the base var number, relative to which the by-val struct will be copied on the stack.
3537 // TODO-Amd64-Unix: Try to share code with copyblk.
3538 // Need refactoring of copyblk before it could be used for putarg_stk.
3539 // The difference for now is that a putarg_stk contains its children, while cpyblk does not.
3540 // This creates differences in code. After some significant refactoring it could be reused.
3542 void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
3544 // We will never call this method for SIMD types, which are stored directly
3545 // in genPutStructArgStk().
3546 noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
3548 // Make sure we got the arguments of the cpblk operation in the right registers
3549 GenTreePtr dstAddr = putArgNode;
3550 GenTreePtr src = putArgNode->gtOp.gtOp1;
3552 size_t size = putArgNode->getArgSize();
3553 assert(size <= CPBLK_UNROLL_LIMIT);
3555 emitter *emit = getEmitter();
3556 unsigned putArgOffset = putArgNode->getArgOffset();
3558 assert(src->isContained());
3560 assert(src->gtOper == GT_LDOBJ);
3562 if (!src->gtOp.gtOp1->isContained())
3564 genConsumeReg(src->gtOp.gtOp1);
3567 unsigned offset = 0;
3569 // If the size of this struct is larger than 16 bytes
3570 // let's use SSE2 to be able to do 16 byte at a time
3571 // loads and stores.
3572 if (size >= XMM_REGSIZE_BYTES)
3574 assert(putArgNode->gtRsvdRegs != RBM_NONE);
3575 regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
3576 assert(genIsValidFloatReg(xmmReg));
3577 size_t slots = size / XMM_REGSIZE_BYTES;
3579 assert(putArgNode->gtGetOp1()->isContained());
3580 assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_LDOBJ);
3582 // TODO: In the below code the load and store instructions are for 16 bytes, but the
3583 // type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
3584 // this probably needs to be changed.
3588 genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(), offset); // Load the address of the child of the LdObj node.
3591 emit->emitIns_S_R(INS_movdqu,
3595 putArgOffset + offset);
3597 offset += XMM_REGSIZE_BYTES;
3601 // Fill the remainder (15 bytes or less) if there's one.
3602 if ((size & 0xf) != 0)
3604 // Grab the integer temp register to emit the remaining loads and stores.
3605 regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
3606 assert(genIsValidIntReg(tmpReg));
3608 if ((size & 8) != 0)
3610 genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
3612 emit->emitIns_S_R(INS_mov,
3616 putArgOffset + offset);
3621 if ((size & 4) != 0)
3623 genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
3625 emit->emitIns_S_R(INS_mov,
3629 putArgOffset + offset);
3634 if ((size & 2) != 0)
3636 genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
3638 emit->emitIns_S_R(INS_mov,
3642 putArgOffset + offset);
3647 if ((size & 1) != 0)
3649 genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
3650 emit->emitIns_S_R(INS_mov,
3654 putArgOffset + offset);
3659 //------------------------------------------------------------------------
3660 // genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
3663 // putArgNode - the PutArgStk tree.
3664 // baseVarNum - the base var number, relative to which the by-val struct bits will go.
3667 // The size argument of the PutArgStk (for structs) is a constant and is between
3668 // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
3670 void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
3672 assert(putArgNode->TypeGet() == TYP_STRUCT);
3673 assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
3674 assert(baseVarNum != BAD_VAR_NUM);
3676 // Make sure we got the arguments of the cpblk operation in the right registers
3677 GenTreePtr dstAddr = putArgNode;
3678 GenTreePtr srcAddr = putArgNode->gtGetOp1();
3681 assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
3682 assert(srcAddr->isContained());
3684 genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
3685 instGen(INS_r_movsb);
3687 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3689 // Generate code for CpObj nodes wich copy structs that have interleaved
3691 // This will generate a sequence of movsq instructions for the cases of non-gc members
3692 // and calls to the BY_REF_ASSIGN helper otherwise.
3693 void CodeGen::genCodeForCpObj(GenTreeCpObj* cpObjNode)
3695 // Make sure we got the arguments of the cpobj operation in the right registers
3696 GenTreePtr clsTok = cpObjNode->ClsTok();
3697 GenTreePtr dstAddr = cpObjNode->Dest();
3698 GenTreePtr srcAddr = cpObjNode->Source();
3700 bool dstOnStack = dstAddr->OperIsLocalAddr();
3703 bool isRepMovsqUsed = false;
3705 assert(!dstAddr->isContained());
3706 assert(!srcAddr->isContained());
3708 // If the GenTree node has data about GC pointers, this means we're dealing
3709 // with CpObj, so this requires special logic.
3710 assert(cpObjNode->gtGcPtrCount > 0);
3712 // MovSq instruction is used for copying non-gcref fields and it needs
3713 // src = RSI and dst = RDI.
3714 // Either these registers must not contain lclVars, or they must be dying or marked for spill.
3715 // This is because these registers are incremented as we go through the struct.
3716 GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
3717 GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
3718 unsigned srcLclVarNum = BAD_VAR_NUM;
3719 unsigned dstLclVarNum = BAD_VAR_NUM;
3720 bool isSrcAddrLiveOut = false;
3721 bool isDstAddrLiveOut = false;
3722 if (genIsRegCandidateLocal(actualSrcAddr))
3724 srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
3725 isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
3727 if (genIsRegCandidateLocal(actualDstAddr))
3729 dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
3730 isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
3732 assert((actualSrcAddr->gtRegNum != REG_RSI) ||
3733 !isSrcAddrLiveOut ||
3734 ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
3735 assert((actualDstAddr->gtRegNum != REG_RDI) ||
3736 !isDstAddrLiveOut ||
3737 ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
3740 // Consume these registers.
3741 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
3742 genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
3743 gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
3744 gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
3746 unsigned slots = cpObjNode->gtSlots;
3748 // If we can prove it's on the stack we don't need to use the write barrier.
3751 if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
3754 // If the destination of the CpObj is on the stack
3755 // make sure we allocated RCX to emit rep movsq.
3756 regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
3757 assert(tmpReg == REG_RCX);
3758 isRepMovsqUsed = true;
3761 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
3762 instGen(INS_r_movsq);
3766 // For small structs, it's better to emit a sequence of movsq than to
3767 // emit a rep movsq instruction.
3777 BYTE* gcPtrs = cpObjNode->gtGcPtrs;
3778 unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
3786 // Let's see if we can use rep movsq instead of a sequence of movsq instructions
3787 // to save cycles and code size.
3789 unsigned nonGcSlotCount = 0;
3795 } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
3797 // If we have a very small contiguous non-gc region, it's better just to
3798 // emit a sequence of movsq instructions
3799 if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
3801 while (nonGcSlotCount > 0)
3810 // Otherwise, we can save code-size and improve CQ by emitting
3812 regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
3813 assert(tmpReg == REG_RCX);
3814 isRepMovsqUsed = true;
3816 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
3817 instGen(INS_r_movsq);
3822 // We have a GC pointer, call the memory barrier.
3823 genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
3829 if (!isRepMovsqUsed)
3831 assert(clsTok->isContained());
3833 assert(gcPtrCount == 0);
3837 // Clear the gcInfo for RSI and RDI.
3838 // While we normally update GC info prior to the last instruction that uses them,
3839 // these actually live into the helper call.
3840 gcInfo.gcMarkRegSetNpt(RBM_RSI);
3841 gcInfo.gcMarkRegSetNpt(RBM_RDI);
3844 // Generate code for a CpBlk node by the means of the VM memcpy helper call
3846 // a) The size argument of the CpBlk is not an integer constant
3847 // b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
3848 void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
3850 #ifdef _TARGET_AMD64_
3851 // Make sure we got the arguments of the cpblk operation in the right registers
3852 GenTreePtr blockSize = cpBlkNode->Size();
3853 GenTreePtr dstAddr = cpBlkNode->Dest();
3854 GenTreePtr srcAddr = cpBlkNode->Source();
3856 assert(!dstAddr->isContained());
3857 assert(!srcAddr->isContained());
3858 assert(!blockSize->isContained());
3861 if (blockSize->IsCnsIntOrI())
3863 assert(blockSize->gtIntCon.gtIconVal >= CPBLK_MOVS_LIMIT);
3867 genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
3869 genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
3870 #else // !_TARGET_AMD64_
3871 noway_assert(false && "Helper call for CpBlk is not needed.");
3872 #endif // !_TARGET_AMD64_
3875 // generate code do a switch statement based on a table of ip-relative offsets
3877 CodeGen::genTableBasedSwitch(GenTree* treeNode)
3879 genConsumeOperands(treeNode->AsOp());
3880 regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
3881 regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
3883 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
3885 // load the ip-relative offset (which is relative to start of fgFirstBB)
3886 getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
3888 // add it to the absolute address of fgFirstBB
3889 compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
3890 getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
3891 getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
3893 getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
3897 // emits the table and an instruction to get the address of the first element
3899 CodeGen::genJumpTable(GenTree* treeNode)
3901 noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
3902 assert(treeNode->OperGet() == GT_JMPTABLE);
3904 unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
3905 BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
3906 unsigned jmpTabOffs;
3907 unsigned jmpTabBase;
3909 jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
3913 JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
3915 for (unsigned i=0; i<jumpCount; i++)
3917 BasicBlock* target = *jumpTable++;
3918 noway_assert(target->bbFlags & BBF_JMP_TARGET);
3920 JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
3922 getEmitter()->emitDataGenData(i, target);
3925 getEmitter()->emitDataGenEnd();
3927 // Access to inline data is 'abstracted' by a special type of static member
3928 // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
3929 // to constant data, not a real static field.
3930 getEmitter()->emitIns_R_C(INS_lea,
3931 emitTypeSize(TYP_I_IMPL),
3933 compiler->eeFindJitDataOffs(jmpTabBase),
3935 genProduceReg(treeNode);
3939 // generate code for the locked operations:
3940 // GT_LOCKADD, GT_XCHG, GT_XADD
3942 CodeGen::genLockedInstructions(GenTree* treeNode)
3944 GenTree* data = treeNode->gtOp.gtOp2;
3945 GenTree* addr = treeNode->gtOp.gtOp1;
3946 regNumber targetReg = treeNode->gtRegNum;
3947 regNumber dataReg = data->gtRegNum;
3948 regNumber addrReg = addr->gtRegNum;
3951 // all of these nodes implicitly do an indirection on op1
3952 // so create a temporary node to feed into the pattern matching
3953 GenTreeIndir i = indirForm(data->TypeGet(), addr);
3954 genConsumeReg(addr);
3956 // The register allocator should have extended the lifetime of the address
3957 // so that it is not used as the target.
3958 noway_assert(addrReg != targetReg);
3960 // If data is a lclVar that's not a last use, we'd better have allocated a register
3961 // for the result (except in the case of GT_LOCKADD which does not produce a register result).
3962 assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0);
3964 genConsumeIfReg(data);
3965 if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
3967 inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
3968 data->gtRegNum = targetReg;
3970 // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
3971 // original gtRegNum on data, after calling emitInsBinary below.
3973 switch (treeNode->OperGet())
3980 // lock is implied by xchg
3990 getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
3992 if (treeNode->gtRegNum != REG_NA)
3994 genProduceReg(treeNode);
3999 // generate code for BoundsCheck nodes
4001 CodeGen::genRangeCheck(GenTreePtr oper)
4004 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
4005 #else // !FEATURE_SIMD
4006 noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
4007 #endif // !FEATURE_SIMD
4009 GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
4011 GenTreePtr arrLen = bndsChk->gtArrLen;
4012 GenTreePtr arrIndex = bndsChk->gtIndex;
4013 GenTreePtr arrRef = NULL;
4016 GenTree *src1, *src2;
4017 emitJumpKind jmpKind;
4019 genConsumeRegs(arrLen);
4020 genConsumeRegs(arrIndex);
4022 if (arrIndex->isContainedIntOrIImmed())
4024 // arrIndex is a contained constant. In this case
4025 // we will generate one of the following
4026 // cmp [mem], immed (if arrLen is a memory op)
4027 // cmp reg, immed (if arrLen is in a reg)
4029 // That is arrLen cannot be a contained immed.
4030 assert(!arrLen->isContainedIntOrIImmed());
4038 // arrIndex could either be a contained memory op or a reg
4039 // In this case we will generate one of the following
4040 // cmp [mem], immed (if arrLen is a constant)
4041 // cmp [mem], reg (if arrLen is in a reg)
4042 // cmp reg, immed (if arrIndex is in a reg)
4043 // cmp reg1, reg2 (if arraIndex is in reg1)
4044 // cmp reg, [mem] (if arrLen is a memory op)
4046 // That is only one of arrIndex or arrLen can be a memory op.
4047 assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
4054 var_types bndsChkType = src2->TypeGet();
4056 // Bounds checks can only be 32 or 64 bit sized comparisons.
4057 assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
4059 // The type of the bounds check should always wide enough to compare against the index.
4060 assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
4063 getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
4064 genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
4068 //------------------------------------------------------------------------
4069 // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
4070 // lower bound for the given dimension.
4073 // elemType - the element type of the array
4074 // rank - the rank of the array
4075 // dimension - the dimension for which the lower bound offset will be returned.
4081 CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
4083 // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
4084 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
4087 //------------------------------------------------------------------------
4088 // genOffsetOfMDArrayLength: Returns the offset from the Array object to the
4089 // size for the given dimension.
4092 // elemType - the element type of the array
4093 // rank - the rank of the array
4094 // dimension - the dimension for which the lower bound offset will be returned.
4100 CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
4102 // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
4103 return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
4106 //------------------------------------------------------------------------
4107 // genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
4108 // producing the effective index by subtracting the lower bound.
4111 // arrIndex - the node for which we're generating code
4118 CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
4120 GenTreePtr arrObj = arrIndex->ArrObj();
4121 GenTreePtr indexNode = arrIndex->IndexExpr();
4123 regNumber arrReg = genConsumeReg(arrObj);
4124 regNumber indexReg = genConsumeReg(indexNode);
4125 regNumber tgtReg = arrIndex->gtRegNum;
4127 unsigned dim = arrIndex->gtCurrDim;
4128 unsigned rank = arrIndex->gtArrRank;
4129 var_types elemType = arrIndex->gtArrElemType;
4131 noway_assert(tgtReg != REG_NA);
4133 // Subtract the lower bound for this dimension.
4134 // TODO-XArch-CQ: make this contained if it's an immediate that fits.
4135 if (tgtReg != indexReg)
4137 inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
4139 getEmitter()->emitIns_R_AR(INS_sub,
4140 emitActualTypeSize(TYP_INT),
4143 genOffsetOfMDArrayLowerBound(elemType, rank, dim));
4144 getEmitter()->emitIns_R_AR(INS_cmp,
4145 emitActualTypeSize(TYP_INT),
4148 genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
4149 genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
4151 genProduceReg(arrIndex);
4154 //------------------------------------------------------------------------
4155 // genCodeForArrOffset: Generates code to compute the flattened array offset for
4156 // one dimension of an array reference:
4157 // result = (prevDimOffset * dimSize) + effectiveIndex
4158 // where dimSize is obtained from the arrObj operand
4161 // arrOffset - the node for which we're generating code
4167 // dimSize and effectiveIndex are always non-negative, the former by design,
4168 // and the latter because it has been normalized to be zero-based.
4171 CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
4173 GenTreePtr offsetNode = arrOffset->gtOffset;
4174 GenTreePtr indexNode = arrOffset->gtIndex;
4175 GenTreePtr arrObj = arrOffset->gtArrObj;
4177 regNumber tgtReg = arrOffset->gtRegNum;
4179 noway_assert(tgtReg != REG_NA);
4181 unsigned dim = arrOffset->gtCurrDim;
4182 unsigned rank = arrOffset->gtArrRank;
4183 var_types elemType = arrOffset->gtArrElemType;
4185 // We will use a temp register for the offset*scale+effectiveIndex computation.
4186 regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
4187 regNumber tmpReg = genRegNumFromMask(tmpRegMask);
4189 // First, consume the operands in the correct order.
4190 regNumber offsetReg = REG_NA;
4191 if (!offsetNode->IsZero())
4193 offsetReg = genConsumeReg(offsetNode);
4197 assert(offsetNode->isContained());
4199 regNumber indexReg = genConsumeReg(indexNode);
4200 // Although arrReg may not be used in the constant-index case, if we have generated
4201 // the value into a register, we must consume it, otherwise we will fail to end the
4202 // live range of the gc ptr.
4203 // TODO-CQ: Currently arrObj will always have a register allocated to it.
4204 // We could avoid allocating a register for it, which would be of value if the arrObj
4205 // is an on-stack lclVar.
4206 regNumber arrReg = REG_NA;
4207 if (arrObj->gtHasReg())
4209 arrReg = genConsumeReg(arrObj);
4212 if (!offsetNode->IsZero())
4214 // Evaluate tgtReg = offsetReg*dim_size + indexReg.
4215 // tmpReg is used to load dim_size and the result of the multiplication.
4216 // Note that dim_size will never be negative.
4218 getEmitter()->emitIns_R_AR(INS_mov,
4219 emitActualTypeSize(TYP_INT),
4222 genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
4223 inst_RV_RV(INS_imul, tmpReg, offsetReg);
4225 if (tmpReg == tgtReg)
4227 inst_RV_RV(INS_add, tmpReg, indexReg);
4231 if (indexReg != tgtReg)
4233 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
4235 inst_RV_RV(INS_add, tgtReg, tmpReg);
4240 if (indexReg != tgtReg)
4242 inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
4245 genProduceReg(arrOffset);
4248 // make a temporary indir we can feed to pattern matching routines
4249 // in cases where we don't want to instantiate all the indirs that happen
4251 GenTreeIndir CodeGen::indirForm(var_types type, GenTree *base)
4253 GenTreeIndir i(GT_IND, type, base, nullptr);
4254 i.gtRegNum = REG_NA;
4255 // has to be nonnull (because contained nodes can't be the last in block)
4256 // but don't want it to be a valid pointer
4257 i.gtNext = (GenTree *)(-1);
4261 // make a temporary int we can feed to pattern matching routines
4262 // in cases where we don't want to instantiate
4264 GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
4266 GenTreeIntCon i(type, value);
4267 i.gtRegNum = REG_NA;
4268 // has to be nonnull (because contained nodes can't be the last in block)
4269 // but don't want it to be a valid pointer
4270 i.gtNext = (GenTree *)(-1);
4275 instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
4279 // Operations on SIMD vectors shouldn't come this path
4280 assert(!varTypeIsSIMD(type));
4281 if (varTypeIsFloating(type))
4283 return ins_MathOp(oper, type);
4288 case GT_ADD: ins = INS_add; break;
4289 case GT_AND: ins = INS_and; break;
4290 case GT_LSH: ins = INS_shl; break;
4291 case GT_MUL: ins = INS_imul; break;
4292 case GT_NEG: ins = INS_neg; break;
4293 case GT_NOT: ins = INS_not; break;
4294 case GT_OR: ins = INS_or; break;
4295 case GT_ROL: ins = INS_rol; break;
4296 case GT_ROR: ins = INS_ror; break;
4297 case GT_RSH: ins = INS_sar; break;
4298 case GT_RSZ: ins = INS_shr; break;
4299 case GT_SUB: ins = INS_sub; break;
4300 case GT_XOR: ins = INS_xor; break;
4301 #if !defined(_TARGET_64BIT_)
4302 case GT_ADD_HI: ins = INS_adc; break;
4303 case GT_SUB_HI: ins = INS_sbb; break;
4304 #endif // !defined(_TARGET_64BIT_)
4305 default: unreached();
4311 /** Generates the code sequence for a GenTree node that
4312 * represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
4314 * Arguments: operand: the value to be shifted or rotated by shiftBy bits.
4315 * shiftBy: the number of bits to shift or rotate the operand.
4316 * parent: the actual bitshift node (that specifies the
4317 * type of bitshift to perform.
4319 * Preconditions: a) All GenTrees are register allocated.
4320 * b) Either shiftBy is a contained constant or
4321 * it's an expression sitting in RCX.
4323 void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy,
4326 var_types targetType = parent->TypeGet();
4327 genTreeOps oper = parent->OperGet();
4328 instruction ins = genGetInsForOper(oper, targetType);
4329 GenTreePtr actualOperand = operand->gtSkipReloadOrCopy();
4331 bool isRMW = parent->gtOp.gtOp1->isContained();
4332 assert(parent->gtRegNum != REG_NA || isRMW);
4334 regNumber operandReg = REG_NA;
4335 regNumber indexReg = REG_NA;
4338 emitAttr attr = EA_UNKNOWN;
4339 bool isClsVarAddr = (operand->OperGet() == GT_CLS_VAR_ADDR);
4340 bool isLclVarAddr = (operand->OperGet() == GT_LCL_VAR_ADDR);
4341 bool isCnsIntOrIAndFitsWithinAddrBase = false;
4345 genConsumeOperands(parent->AsOp());
4346 operandReg = operand->gtRegNum;
4350 targetType = parent->gtOp.gtOp1->TypeGet();
4351 attr = EA_ATTR(genTypeSize(targetType));
4353 if (actualOperand->OperGet() == GT_LCL_VAR)
4355 operandReg = operand->gtRegNum;
4357 else if (actualOperand->OperGet() == GT_LEA)
4359 operandReg = actualOperand->gtOp.gtOp1->gtRegNum;
4360 GenTreeAddrMode* addrMode = actualOperand->AsAddrMode();
4361 offset = addrMode->gtOffset;
4362 if(addrMode->Index() != nullptr)
4364 indexReg = addrMode->Index()->gtRegNum;
4366 // GT_LEA with an indexReg is not supported for shift by immediate
4367 assert(!shiftBy->isContainedIntOrIImmed());
4370 else if (actualOperand->IsCnsIntOrI())
4372 GenTreeIntConCommon* intCon = actualOperand->AsIntConCommon();
4373 if (actualOperand->isContained())
4375 // Contained absolute address should fit within addr base
4376 assert(intCon->FitsInAddrBase(compiler));
4378 // Don't expect to see GT_COPY or GT_RELOAD
4379 assert(operand == actualOperand);
4381 isCnsIntOrIAndFitsWithinAddrBase = true;
4382 disp = intCon->IconValue();
4384 if (intCon->AddrNeedsReloc(compiler))
4386 attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
4391 operandReg = operand->gtRegNum;
4396 // The only other supported operands for RMW are GT_CLS_VAR_ADDR and GT_LCL_VAR_ADDR
4397 assert(actualOperand->OperGet() == GT_CLS_VAR_ADDR || actualOperand->OperGet() == GT_LCL_VAR_ADDR);
4399 // We don't expect to see GT_COPY or GT_RELOAD for GT_CLS_VAR_ADDR and GT_LCL_VAR_ADDR
4400 // so 'actualOperand' should be the same as 'operand'
4401 assert(operand == actualOperand);
4405 if (shiftBy->isContainedIntOrIImmed())
4407 int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4411 // First, move the operand to the destination register and
4412 // later on perform the shift in-place.
4413 // (LSRA will try to avoid this situation through preferencing.)
4414 if (parent->gtRegNum != operandReg)
4416 inst_RV_RV(INS_mov, parent->gtRegNum, operandReg, targetType);
4419 inst_RV_SH(ins, emitTypeSize(parent), parent->gtRegNum, shiftByValue);
4423 if ((isClsVarAddr || isLclVarAddr) && shiftByValue == 1)
4443 // leave 'ins' unchanged
4449 getEmitter()->emitIns_C(ins, attr, operand->gtClsVar.gtClsVarHnd, 0);
4453 getEmitter()->emitIns_S(ins, attr, operand->gtLclVarCommon.gtLclNum, 0);
4476 // leave 'ins' unchanged
4481 getEmitter()->emitIns_C_I(ins, attr, operand->gtClsVar.gtClsVarHnd, 0, shiftByValue);
4483 else if (isLclVarAddr)
4485 getEmitter()->emitIns_S(ins, attr, operand->gtLclVarCommon.gtLclNum, 0);
4487 else if (isCnsIntOrIAndFitsWithinAddrBase)
4489 getEmitter()->emitIns_I_AI(ins, attr, shiftByValue, disp);
4493 getEmitter()->emitIns_I_AR(ins, attr, shiftByValue, operandReg, offset);
4501 // We must have the number of bits to shift
4502 // stored in ECX, since we constrained this node to
4503 // sit in ECX, in case this didn't happen, LSRA expects
4504 // the code generator to move it since it's a single
4505 // register destination requirement.
4506 regNumber shiftReg = shiftBy->gtRegNum;
4507 if (shiftReg != REG_RCX)
4509 // Issue the mov to RCX:
4510 inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
4514 // The operand to be shifted must not be in ECX
4515 noway_assert(operandReg != REG_RCX);
4521 getEmitter()->emitIns_C_R(ins, attr, operand->gtClsVar.gtClsVarHnd, shiftReg, 0);
4523 else if (isLclVarAddr)
4525 getEmitter()->emitIns_S_R(ins, attr, shiftReg, operand->gtLclVarCommon.gtLclNum, 0);
4527 else if (isCnsIntOrIAndFitsWithinAddrBase)
4529 getEmitter()->emitIns_AI_R(ins, attr, shiftReg, disp);
4533 getEmitter()->emitIns_AR_R(ins, attr, indexReg, operandReg, (int) offset);
4538 if (parent->gtRegNum != operandReg)
4540 inst_RV_RV(INS_mov, parent->gtRegNum, operandReg, targetType);
4542 inst_RV_CL(ins, parent->gtRegNum, targetType);
4545 genProduceReg(parent);
4548 void CodeGen::genUnspillRegIfNeeded(GenTree *tree)
4550 regNumber dstReg = tree->gtRegNum;
4552 GenTree* unspillTree = tree;
4553 if (tree->gtOper == GT_RELOAD)
4555 unspillTree = tree->gtOp.gtOp1;
4557 if (unspillTree->gtFlags & GTF_SPILLED)
4559 if (genIsRegCandidateLocal(unspillTree))
4561 // Reset spilled flag, since we are going to load a local variable from its home location.
4562 unspillTree->gtFlags &= ~GTF_SPILLED;
4564 GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
4565 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
4567 // Load local variable from its home location.
4568 // In most cases the tree type will indicate the correct type to use for the load.
4569 // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
4570 // widened when loaded into a register), and its size is not the same as genActualType of
4571 // the type of the lclVar, then we need to change the type of the tree node when loading.
4572 // This situation happens due to "optimizations" that avoid a cast and
4573 // simply retype the node when using long type lclVar as an int.
4574 // While loading the int in that case would work for this use of the lclVar, if it is
4575 // later used as a long, we will have incorrectly truncated the long.
4576 // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
4579 var_types treeType = unspillTree->TypeGet();
4580 if (treeType != genActualType(varDsc->lvType) &&
4581 !varTypeIsGC(treeType) &&
4582 !varDsc->lvNormalizeOnLoad())
4584 assert(!varTypeIsGC(varDsc));
4585 var_types spillType = genActualType(varDsc->lvType);
4586 unspillTree->gtType = spillType;
4587 inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
4588 unspillTree->gtType = treeType;
4592 inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
4595 unspillTree->SetInReg();
4597 // TODO-Review: We would like to call:
4598 // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
4599 // instead of the following code, but this ends up hitting this assert:
4600 // assert((regSet.rsMaskVars & regMask) == 0);
4601 // due to issues with LSRA resolution moves.
4602 // So, just force it for now. This probably indicates a condition that creates a GC hole!
4604 // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
4605 // because the variable is not really going live or dead, but that method is somewhat poorly
4606 // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
4607 // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
4609 // Don't update the variable's location if we are just re-spilling it again.
4611 if ((unspillTree->gtFlags & GTF_SPILL) == 0)
4613 genUpdateVarReg(varDsc, tree);
4615 if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
4617 JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
4620 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
4623 if (compiler->verbose)
4625 printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
4626 varDsc->PrintVarReg();
4627 printf(" is becoming live ");
4628 compiler->printTreeID(unspillTree);
4633 regSet.AddMaskVars(genGetRegMask(varDsc));
4638 TempDsc* t = regSet.rsUnspillInPlace(unspillTree);
4639 getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType),
4640 emitActualTypeSize(unspillTree->gtType),
4644 compiler->tmpRlsTemp(t);
4646 unspillTree->gtFlags &= ~GTF_SPILLED;
4647 unspillTree->SetInReg();
4650 gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
4654 // Do Liveness update for a subnodes that is being consumed by codegen
4655 // including the logic for reload in case is needed and also takes care
4656 // of locating the value on the desired register.
4657 void CodeGen::genConsumeRegAndCopy(GenTree *tree, regNumber needReg)
4659 if (needReg == REG_NA)
4663 regNumber treeReg = genConsumeReg(tree);
4664 if (treeReg != needReg)
4666 inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
4670 void CodeGen::genRegCopy(GenTree* treeNode)
4672 assert(treeNode->OperGet() == GT_COPY);
4673 var_types targetType = treeNode->TypeGet();
4674 regNumber targetReg = treeNode->gtRegNum;
4675 assert(targetReg != REG_NA);
4677 GenTree* op1 = treeNode->gtOp.gtOp1;
4679 // Check whether this node and the node from which we're copying the value have the same
4681 // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
4682 // register, in which case it is passed as an argument, or returned from a call,
4683 // in an integer register and must be copied if it's in an xmm register.
4685 bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
4686 bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
4687 if (srcFltReg != tgtFltReg)
4694 ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
4696 intReg = op1->gtRegNum;
4700 ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
4702 fpReg = op1->gtRegNum;
4704 inst_RV_RV(ins, fpReg, intReg, targetType);
4708 inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
4713 // The lclVar will never be a def.
4714 // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
4715 // appropriately set the gcInfo for the copied value.
4716 // If not, there are two cases we need to handle:
4717 // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
4718 // will remain live in its original register.
4719 // genProduceReg() will appropriately set the gcInfo for the copied value,
4720 // and genConsumeReg will reset it.
4721 // - Otherwise, we need to update register info for the lclVar.
4723 GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
4724 assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
4726 if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
4728 LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
4730 // If we didn't just spill it (in genConsumeReg, above), then update the register info
4731 if (varDsc->lvRegNum != REG_STK)
4733 // The old location is dying
4734 genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
4736 gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
4738 genUpdateVarReg(varDsc, treeNode);
4740 // The new location is going live
4741 genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
4745 genProduceReg(treeNode);
4748 // Check that registers are consumed in the right order for the current node being generated.
4750 void CodeGen::genCheckConsumeNode(GenTree* treeNode)
4752 // GT_PUTARG_REG is consumed out of order.
4753 if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
4755 if (lastConsumedNode != nullptr)
4757 if (treeNode == lastConsumedNode)
4761 printf("Node was consumed twice:\n ");
4762 compiler->gtDispTree(treeNode, nullptr, nullptr, true);
4767 if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
4769 printf("Nodes were consumed out-of-order:\n");
4770 compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
4771 compiler->gtDispTree(treeNode, nullptr, nullptr, true);
4773 // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
4776 lastConsumedNode = treeNode;
4781 // Do liveness update for a subnode that is being consumed by codegen.
4782 regNumber CodeGen::genConsumeReg(GenTree *tree)
4784 if (tree->OperGet() == GT_COPY)
4788 // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
4789 // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
4790 // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
4791 // always using GT_COPY to make the lclVar location explicit.
4792 // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
4793 // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
4794 // the lclVar (normally when a lclVar is spilled it is then used from its former register
4795 // location, which matches the gtRegNum on the node).
4796 // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
4797 // because if it's on the stack it will always get reloaded into tree->gtRegNum).
4798 if (genIsRegCandidateLocal(tree))
4800 GenTreeLclVarCommon *lcl = tree->AsLclVarCommon();
4801 LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
4802 if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
4804 inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
4808 genUnspillRegIfNeeded(tree);
4810 // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
4811 genUpdateLife(tree);
4813 assert(tree->gtRegNum != REG_NA);
4815 // there are three cases where consuming a reg means clearing the bit in the live mask
4816 // 1. it was not produced by a local
4817 // 2. it was produced by a local that is going dead
4818 // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
4820 if (genIsRegCandidateLocal(tree))
4822 GenTreeLclVarCommon *lcl = tree->AsLclVarCommon();
4823 LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
4824 assert(varDsc->lvLRACandidate);
4826 if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
4828 gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
4830 else if (varDsc->lvRegNum == REG_STK)
4832 // We have loaded this into a register only temporarily
4833 gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
4838 gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
4841 genCheckConsumeNode(tree);
4842 return tree->gtRegNum;
4845 // Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
4846 void CodeGen::genConsumeAddress(GenTree* addr)
4848 if (addr->OperGet() == GT_LEA)
4850 genConsumeAddrMode(addr->AsAddrMode());
4852 else if (!addr->isContained())
4854 genConsumeReg(addr);
4858 // do liveness update for a subnode that is being consumed by codegen
4859 void CodeGen::genConsumeAddrMode(GenTreeAddrMode *addr)
4861 genConsumeOperands(addr);
4864 void CodeGen::genConsumeRegs(GenTree* tree)
4866 #if !defined(_TARGET_64BIT_)
4867 if (tree->OperGet() == GT_LONG)
4869 genConsumeRegs(tree->gtGetOp1());
4870 genConsumeRegs(tree->gtGetOp2());
4873 #endif // !defined(_TARGET_64BIT_)
4875 if (tree->isContained())
4877 if (tree->isIndir())
4879 genConsumeAddress(tree->AsIndir()->Addr());
4881 else if (tree->OperGet() == GT_AND)
4883 // This is the special contained GT_AND that we created in Lowering::LowerCmp()
4884 // Now we need to consume the operands of the GT_AND node.
4885 genConsumeOperands(tree->AsOp());
4889 assert(tree->OperIsLeaf());
4894 genConsumeReg(tree);
4898 //------------------------------------------------------------------------
4899 // genConsumeOperands: Do liveness update for the operands of a unary or binary tree
4902 // tree - the GenTreeOp whose operands will have their liveness updated.
4908 // Note that this logic is localized here because we must do the liveness update in
4909 // the correct execution order. This is important because we may have two operands
4910 // that involve the same lclVar, and if one is marked "lastUse" we must handle it
4913 void CodeGen::genConsumeOperands(GenTreeOp* tree)
4915 GenTree* firstOp = tree->gtOp1;
4916 GenTree* secondOp = tree->gtOp2;
4917 if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
4919 assert(secondOp != nullptr);
4921 secondOp = tree->gtOp1;
4923 if (firstOp != nullptr)
4925 genConsumeRegs(firstOp);
4927 if (secondOp != nullptr)
4929 genConsumeRegs(secondOp);
4933 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4934 //------------------------------------------------------------------------
4935 // genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
4936 // Also loads in the right register the addresses of the
4937 // src/dst for rep mov operation.
4940 // putArgNode - the PUTARG_STK tree.
4941 // dstReg - the dstReg for the rep move operation.
4942 // srcReg - the srcReg for the rep move operation.
4943 // sizeReg - the sizeReg for the rep move operation.
4944 // baseVarNum - the base for var numfor placing the "by-value" args on the stack.
4949 // Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
4950 // for copying on the stack a struct with references.
4952 void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
4954 assert(varTypeIsStruct(putArgNode));
4955 assert(baseVarNum != BAD_VAR_NUM);
4957 // The putArgNode children are always contained. We should not consume any registers.
4958 assert(putArgNode->gtGetOp1()->isContained());
4960 GenTree* dst = putArgNode;
4962 // Get the GT_ADDR node, which is GT_LCL_VAR_ADDR (asserted below.)
4963 GenTree* src = putArgNode->gtGetOp1();
4964 assert((src->gtOper == GT_LDOBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
4965 src = src->gtGetOp1();
4967 size_t size = putArgNode->getArgSize();
4974 assert(dstReg != REG_NA);
4975 assert(srcReg != REG_NA);
4977 // Consume the registers only if they are not contained or set to REG_NA.
4978 if (op2->gtRegNum != REG_NA)
4983 // If the op1 is already in the dstReg - nothing to do.
4984 // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
4985 if (op1->gtRegNum != dstReg)
4987 // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area for tail calls) in RDI.
4988 getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
4991 if (op2->gtRegNum != srcReg)
4993 if (src->OperIsLocalAddr())
4995 // The OperLocalAddr is always contained.
4996 assert(src->isContained());
4997 GenTreeLclVarCommon* lclNode = src->AsLclVarCommon();
4999 // Generate LEA instruction to load the LclVar address in RSI.
5000 getEmitter()->emitIns_R_S(INS_lea, emitTypeSize(src), srcReg, lclNode->gtLclNum, 0);
5004 assert(src->gtRegNum != REG_NA);
5005 getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(src), srcReg, src->gtRegNum);
5009 if (sizeReg != REG_NA)
5011 inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
5014 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
5016 void CodeGen::genConsumeBlockOp(GenTreeBlkOp* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
5018 // We have to consume the registers, and perform any copies, in the actual execution order.
5019 // The nominal order is: dst, src, size. However this may have been changed
5020 // with reverse flags on either the GT_LIST or the GT_INITVAL itself.
5021 // Note that the register allocator ensures that the registers ON THE NODES will not interfere
5022 // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
5023 // Further, it ensures that they will not interfere with one another if they are then copied
5024 // to the REQUIRED register (if a fixed register requirement) in execution order. This requires,
5025 // then, that we first consume all the operands, then do any necessary moves.
5027 GenTree* dst = blkNode->Dest();
5028 GenTree* src = blkNode->gtOp.gtOp1->gtOp.gtOp2;
5029 GenTree* size = blkNode->gtOp.gtOp2;
5033 regNumber reg1, reg2, reg3;
5034 if (!blkNode->IsReverseOp() && !blkNode->gtOp1->IsReverseOp())
5043 else if (!blkNode->IsReverseOp())
5045 // We know that the operands for the GT_LIST node 'blkNode->gtOp.gtOp1' are reversed.
5053 else if (!blkNode->gtOp1->IsReverseOp())
5055 // We know from above that the operands to 'blkNode' are reversed.
5065 // They are BOTH reversed.
5085 if ((reg1 != REG_NA) && (op1->gtRegNum != reg1))
5087 inst_RV_RV(INS_mov, reg1, op1->gtRegNum, op1->TypeGet());
5089 if ((reg2 != REG_NA) && (op2->gtRegNum != reg2))
5091 inst_RV_RV(INS_mov, reg2, op2->gtRegNum, op2->TypeGet());
5093 if ((reg3 != REG_NA) && (op3->gtRegNum != reg3))
5095 inst_RV_RV(INS_mov, reg3, op3->gtRegNum, op3->TypeGet());
5099 // do liveness update for register produced by the current node in codegen
5100 void CodeGen::genProduceReg(GenTree *tree)
5102 if (tree->gtFlags & GTF_SPILL)
5104 if (genIsRegCandidateLocal(tree))
5106 // Store local variable to its home location.
5107 tree->gtFlags &= ~GTF_REG_VAL;
5108 // Ensure that lclVar stores are typed correctly.
5109 unsigned varNum = tree->gtLclVarCommon.gtLclNum;
5110 assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() || (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
5111 inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
5116 regSet.rsSpillTree(tree->gtRegNum, tree);
5117 tree->gtFlags |= GTF_SPILLED;
5118 tree->gtFlags &= ~GTF_SPILL;
5119 gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
5124 genUpdateLife(tree);
5126 // If we've produced a register, mark it as a pointer, as needed.
5127 if (tree->gtHasReg())
5129 // We only mark the register in the following cases:
5130 // 1. It is not a register candidate local. In this case, we're producing a
5131 // register from a local, but the local is not a register candidate. Thus,
5132 // we must be loading it as a temp register, and any "last use" flag on
5133 // the register wouldn't be relevant.
5134 // 2. The register candidate local is going dead. There's no point to mark
5135 // the register as live, with a GC pointer, if the variable is dead.
5136 if (!genIsRegCandidateLocal(tree) ||
5137 ((tree->gtFlags & GTF_VAR_DEATH) == 0))
5139 gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
5145 // transfer gc/byref status of src reg to dst reg
5146 void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
5148 regMaskTP srcMask = genRegMask(src);
5149 regMaskTP dstMask = genRegMask(dst);
5151 if (gcInfo.gcRegGCrefSetCur & srcMask)
5153 gcInfo.gcMarkRegSetGCref(dstMask);
5155 else if (gcInfo.gcRegByrefSetCur & srcMask)
5157 gcInfo.gcMarkRegSetByref(dstMask);
5161 gcInfo.gcMarkRegSetNpt(dstMask);
5165 // generates an ip-relative call or indirect call via reg ('call reg')
5166 // pass in 'addr' for a relative call or 'base' for a indirect register call
5167 // methHnd - optional, only used for pretty printing
5168 // retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
5169 void CodeGen::genEmitCall(int callType,
5170 CORINFO_METHOD_HANDLE methHnd,
5171 INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
5173 X86_ARG(ssize_t argSize),
5175 IL_OFFSETX ilOffset,
5180 #if !defined(_TARGET_X86_)
5181 ssize_t argSize = 0;
5182 #endif // !defined(_TARGET_X86_)
5183 getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
5185 INDEBUG_LDISASM_COMMA(sigInfo)
5189 gcInfo.gcVarPtrSetCur,
5190 gcInfo.gcRegGCrefSetCur,
5191 gcInfo.gcRegByrefSetCur,
5195 emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
5198 // generates an indirect call via addressing mode (call []) given an indir node
5199 // methHnd - optional, only used for pretty printing
5200 // retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
5201 void CodeGen::genEmitCall(int callType,
5202 CORINFO_METHOD_HANDLE methHnd,
5203 INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
5205 X86_ARG(ssize_t argSize),
5207 IL_OFFSETX ilOffset)
5209 #if !defined(_TARGET_X86_)
5210 ssize_t argSize = 0;
5211 #endif // !defined(_TARGET_X86_)
5212 genConsumeAddress(indir->Addr());
5214 getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
5216 INDEBUG_LDISASM_COMMA(sigInfo)
5220 gcInfo.gcVarPtrSetCur,
5221 gcInfo.gcRegGCrefSetCur,
5222 gcInfo.gcRegByrefSetCur,
5224 indir->Base() ? indir->Base()->gtRegNum : REG_NA,
5225 indir->Index() ? indir->Index()->gtRegNum : REG_NA,
5231 //------------------------------------------------------------------------
5232 // genStoreInd: Generate code for a GT_STOREIND node.
5235 // treeNode - The GT_STOREIND node for which to generate code.
5240 void CodeGen::genStoreInd(GenTreePtr node)
5242 assert(node->OperGet() == GT_STOREIND);
5245 // Storing Vector3 of size 12 bytes through indirection
5246 if (node->TypeGet() == TYP_SIMD12)
5248 genStoreIndTypeSIMD12(node);
5251 #endif //FEATURE_SIMD
5253 GenTreeStoreInd* storeInd = node->AsStoreInd();
5254 GenTree* data = storeInd->Data();
5255 GenTree* addr = storeInd->Addr();
5256 var_types targetType = node->TypeGet();
5258 assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
5260 GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(node, data);
5261 if (writeBarrierForm != GCInfo::WBF_NoBarrier)
5263 // data and addr must be in registers.
5264 // Consume both registers so that any copies of interfering registers are taken care of.
5265 genConsumeOperands(node->AsOp());
5267 if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
5270 // At this point, we should not have any interference.
5271 // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
5272 noway_assert(data->gtRegNum != REG_ARG_0);
5274 // addr goes in REG_ARG_0
5275 if (addr->gtRegNum != REG_ARG_0)
5277 inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
5280 // data goes in REG_ARG_1
5281 if (data->gtRegNum != REG_ARG_1)
5283 inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
5286 genGCWriteBarrier(node, writeBarrierForm);
5290 bool reverseOps = ((node->gtFlags & GTF_REVERSE_OPS) != 0);
5291 bool dataIsUnary = false;
5292 bool isRMWMemoryOp = storeInd->IsRMWMemoryOp();
5293 GenTree* rmwSrc = nullptr;
5295 // We must consume the operands in the proper execution order, so that liveness is
5296 // updated appropriately.
5299 genConsumeAddress(addr);
5302 // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained
5303 // and non-indir operand of data is the source of RMW memory op.
5306 assert(data->isContained() && !data->OperIsLeaf());
5308 GenTreePtr rmwDst = nullptr;
5310 dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
5313 if (storeInd->IsRMWDstOp1())
5315 rmwDst = data->gtGetOp1();
5316 rmwSrc = data->gtGetOp2();
5320 assert(storeInd->IsRMWDstOp2());
5321 rmwDst = data->gtGetOp2();
5322 rmwSrc = data->gtGetOp1();
5327 // For unary RMW ops, src and dst of RMW memory op is the same.
5328 assert(storeInd->IsRMWDstOp1());
5329 rmwSrc = data->gtGetOp1();
5330 rmwDst = data->gtGetOp1();
5331 assert(rmwSrc->isContained());
5334 assert(rmwSrc != nullptr);
5335 assert(rmwDst != nullptr);
5336 assert(Lowering::IndirsAreEquivalent(rmwDst, node));
5338 genConsumeRegs(rmwSrc);
5342 genConsumeRegs(data);
5347 genConsumeAddress(addr);
5354 // generate code for unary RMW memory ops like neg/not
5355 getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(node), node);
5359 if (data->OperGet() == GT_LSH ||
5360 data->OperGet() == GT_RSH ||
5361 data->OperGet() == GT_RSZ ||
5362 data->OperGet() == GT_ROL ||
5363 data->OperGet() == GT_ROR)
5365 // generate code for shift RMW memory ops
5366 genCodeForShift(addr, rmwSrc, data);
5370 // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
5371 getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(node), node, rmwSrc);
5377 getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(node), node);
5383 //------------------------------------------------------------------------
5384 // genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
5385 // helper functions.
5388 // writeBarrierForm - the write barrier form to use
5389 // addr - the address at which to do the store
5390 // data - the data to store
5393 // true if an optimized write barrier form was used, false if not. If this
5394 // function returns false, the caller must emit a "standard" write barrier.
5396 bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
5398 assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
5400 #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
5401 bool useOptimizedWriteBarriers = true;
5404 useOptimizedWriteBarriers = (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
5407 if (!useOptimizedWriteBarriers)
5412 const static int regToHelper[2][8] =
5414 // If the target is known to be in managed memory
5416 CORINFO_HELP_ASSIGN_REF_EAX,
5417 CORINFO_HELP_ASSIGN_REF_ECX,
5419 CORINFO_HELP_ASSIGN_REF_EBX,
5421 CORINFO_HELP_ASSIGN_REF_EBP,
5422 CORINFO_HELP_ASSIGN_REF_ESI,
5423 CORINFO_HELP_ASSIGN_REF_EDI,
5426 // Don't know if the target is in managed memory
5428 CORINFO_HELP_CHECKED_ASSIGN_REF_EAX,
5429 CORINFO_HELP_CHECKED_ASSIGN_REF_ECX,
5431 CORINFO_HELP_CHECKED_ASSIGN_REF_EBX,
5433 CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
5434 CORINFO_HELP_CHECKED_ASSIGN_REF_ESI,
5435 CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
5439 noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
5440 noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
5441 noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
5442 noway_assert(regToHelper[0][REG_ESP] == -1);
5443 noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
5444 noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
5445 noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
5447 noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
5448 noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
5449 noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
5450 noway_assert(regToHelper[1][REG_ESP] == -1);
5451 noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
5452 noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
5453 noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
5455 regNumber reg = data->gtRegNum;
5456 noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
5458 // Generate the following code:
5460 // call write_barrier_helper_reg
5462 // addr goes in REG_ARG_0
5463 if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
5465 inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
5468 unsigned tgtAnywhere = 0;
5469 if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
5474 // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
5475 // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
5476 // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
5478 genEmitHelperCall(regToHelper[tgtAnywhere][reg],
5480 EA_PTRSIZE); // retSize
5483 #else // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
5485 #endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
5488 // Produce code for a GT_CALL node
5489 void CodeGen::genCallInstruction(GenTreePtr node)
5491 GenTreeCall *call = node->AsCall();
5493 assert(call->gtOper == GT_CALL);
5495 gtCallTypes callType = (gtCallTypes)call->gtCallType;
5497 IL_OFFSETX ilOffset = BAD_IL_OFFSET;
5499 // all virtuals should have been expanded into a control expression
5500 assert (!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
5502 // Consume all the arg regs
5503 for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
5505 assert(list->IsList());
5507 GenTreePtr argNode = list->Current();
5509 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
5510 assert(curArgTabEntry);
5512 if (curArgTabEntry->regNum == REG_STK)
5515 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
5516 // Deal with multi register passed struct args.
5517 if (argNode->OperGet() == GT_LIST)
5519 GenTreeArgList* argListPtr = argNode->AsArgList();
5520 unsigned iterationNum = 0;
5521 for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
5523 GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
5524 assert(putArgRegNode->gtOper == GT_PUTARG_REG);
5525 regNumber argReg = REG_NA;
5526 if (iterationNum == 0)
5528 argReg = curArgTabEntry->regNum;
5530 else if (iterationNum == 1)
5532 argReg = curArgTabEntry->otherRegNum;
5536 assert(false); // Illegal state.
5539 genConsumeReg(putArgRegNode);
5540 if (putArgRegNode->gtRegNum != argReg)
5542 inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg, putArgRegNode->gtRegNum);
5547 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
5549 regNumber argReg = curArgTabEntry->regNum;
5550 genConsumeReg(argNode);
5551 if (argNode->gtRegNum != argReg)
5553 inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
5558 // In the case of a varargs call,
5559 // the ABI dictates that if we have floating point args,
5560 // we must pass the enregistered arguments in both the
5561 // integer and floating point registers so, let's do that.
5562 if (call->IsVarargs() && varTypeIsFloating(argNode))
5564 regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
5565 instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
5566 inst_RV_RV(ins, argNode->gtRegNum, targetReg);
5568 #endif // FEATURE_VARARG
5571 #if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5572 // The call will pop its arguments.
5573 // for each putarg_stk:
5574 ssize_t stackArgBytes = 0;
5575 GenTreePtr args = call->gtCallArgs;
5578 GenTreePtr arg = args->gtOp.gtOp1;
5579 if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
5581 #if defined(_TARGET_X86_)
5582 assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
5583 if (arg->OperGet() == GT_LONG)
5585 assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
5587 #endif // defined(_TARGET_X86_)
5589 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
5590 if (genActualType(arg->TypeGet()) == TYP_STRUCT)
5592 assert(arg->OperGet() == GT_PUTARG_STK);
5594 GenTreeLdObj* ldObj = arg->gtGetOp1()->AsLdObj();
5595 stackArgBytes = compiler->info.compCompHnd->getClassSize(ldObj->gtClass);
5598 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
5600 stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
5602 args = args->gtOp.gtOp2;
5604 #endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5606 // Insert a null check on "this" pointer if asked.
5607 if (call->NeedsNullCheck())
5609 const regNumber regThis = genGetThisArgReg(call);
5610 getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
5613 // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
5614 CORINFO_METHOD_HANDLE methHnd;
5615 GenTree* target = call->gtControlExpr;
5616 if (callType == CT_INDIRECT)
5618 assert(target == nullptr);
5619 target = call->gtCall.gtCallAddr;
5624 methHnd = call->gtCallMethHnd;
5627 CORINFO_SIG_INFO* sigInfo = nullptr;
5629 // Pass the call signature information down into the emitter so the emitter can associate
5630 // native call sites with the signatures they were generated from.
5631 if (callType != CT_HELPER)
5633 sigInfo = call->callSig;
5637 // If fast tail call, then we are done. In this case we setup the args (both reg args
5638 // and stack args in incoming arg area) and call target in rax. Epilog sequence would
5639 // generate "jmp rax".
5640 if (call->IsFastTailCall())
5642 // Don't support fast tail calling JIT helpers
5643 assert(callType != CT_HELPER);
5645 // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
5646 assert(target != nullptr);
5648 genConsumeReg(target);
5649 if (target->gtRegNum != REG_RAX)
5651 inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
5656 // For a pinvoke to unmanged code we emit a label to clear
5657 // the GC pointer state before the callsite.
5658 // We can't utilize the typical lazy killing of GC pointers
5659 // at (or inside) the callsite.
5660 if (call->IsUnmanaged())
5662 genDefineTempLabel(genCreateTempLabel());
5665 // Determine return value size.
5666 emitAttr retSize = EA_PTRSIZE;
5667 if (call->gtType == TYP_REF ||
5668 call->gtType == TYP_ARRAY)
5672 else if (call->gtType == TYP_BYREF)
5677 bool fPossibleSyncHelperCall = false;
5678 CorInfoHelpFunc helperNum = CORINFO_HELP_UNDEF;
5680 #ifdef DEBUGGING_SUPPORT
5681 // We need to propagate the IL offset information to the call instruction, so we can emit
5682 // an IL to native mapping record for the call, to support managed return value debugging.
5683 // We don't want tail call helper calls that were converted from normal calls to get a record,
5684 // so we skip this hash table lookup logic in that case.
5685 if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
5687 (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
5689 #endif // DEBUGGING_SUPPORT
5691 #if defined(_TARGET_X86_)
5692 // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
5693 // adjust its stack level accordingly.
5694 // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
5695 // pop when we're done.
5696 ssize_t argSizeForEmitter = stackArgBytes;
5697 if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0)
5699 argSizeForEmitter = -stackArgBytes;
5702 #endif // defined(_TARGET_X86_)
5704 if (target != nullptr)
5706 if (target->isContainedIndir())
5708 if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
5710 // Note that if gtControlExpr is an indir of an absolute address, we mark it as
5711 // contained only if it can be encoded as PC-relative offset.
5712 assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
5714 genEmitCall(emitter::EC_FUNC_TOKEN_INDIR,
5716 INDEBUG_LDISASM_COMMA(sigInfo)
5717 (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue()
5718 X86_ARG(argSizeForEmitter),
5724 GenTree* addr = target->gtGetOp1();
5725 genConsumeAddress(addr);
5726 genEmitCall(emitter::EC_INDIR_ARD,
5728 INDEBUG_LDISASM_COMMA(sigInfo)
5730 X86_ARG(argSizeForEmitter),
5737 // We have already generated code for gtControlExpr evaluating it into a register.
5738 // We just need to emit "call reg" in this case.
5739 assert(genIsValidIntReg(target->gtRegNum));
5740 genEmitCall(emitter::EC_INDIR_R,
5742 INDEBUG_LDISASM_COMMA(sigInfo)
5744 X86_ARG(argSizeForEmitter),
5747 genConsumeReg(target));
5750 #ifdef FEATURE_READYTORUN_COMPILER
5751 else if (call->gtEntryPoint.addr != nullptr)
5753 genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR,
5755 INDEBUG_LDISASM_COMMA(sigInfo)
5756 (void*) call->gtEntryPoint.addr
5757 X86_ARG(argSizeForEmitter),
5764 // Generate a direct call to a non-virtual user defined or helper method
5765 assert(callType == CT_HELPER || callType == CT_USER_FUNC);
5767 void *addr = nullptr;
5768 if (callType == CT_HELPER)
5770 // Direct call to a helper method.
5771 helperNum = compiler->eeGetHelperNum(methHnd);
5772 noway_assert(helperNum != CORINFO_HELP_UNDEF);
5774 void *pAddr = nullptr;
5775 addr = compiler->compGetHelperFtn(helperNum, (void **)&pAddr);
5777 if (addr == nullptr)
5782 // tracking of region protected by the monitor in synchronized methods
5783 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
5785 fPossibleSyncHelperCall = true;
5790 // Direct call to a non-virtual user function.
5791 CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
5792 if (call->IsSameThis())
5794 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
5797 if ((call->NeedsNullCheck()) == 0)
5799 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
5802 CORINFO_CONST_LOOKUP addrInfo;
5803 compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
5805 addr = addrInfo.addr;
5808 // Non-virtual direct calls to known addresses
5809 genEmitCall(emitter::EC_FUNC_TOKEN,
5811 INDEBUG_LDISASM_COMMA(sigInfo)
5813 X86_ARG(argSizeForEmitter),
5818 // if it was a pinvoke we may have needed to get the address of a label
5819 if (genPendingCallLabel)
5821 assert(call->IsUnmanaged());
5822 genDefineTempLabel(genPendingCallLabel);
5823 genPendingCallLabel = nullptr;
5826 #if defined(_TARGET_X86_)
5827 // The call will pop its arguments.
5828 genStackLevel -= stackArgBytes;
5829 #endif // defined(_TARGET_X86_)
5832 // All Callee arg registers are trashed and no longer contain any GC pointers.
5833 // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
5834 // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
5835 // registers from RBM_CALLEE_TRASH.
5836 assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
5837 assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
5838 gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
5839 gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
5841 var_types returnType = call->TypeGet();
5842 if (returnType != TYP_VOID)
5845 if (varTypeIsFloating(returnType))
5847 // Spill the value from the fp stack.
5848 // Then, load it into the target register.
5849 call->gtFlags |= GTF_SPILL;
5850 regSet.rsSpillFPStack(call);
5851 call->gtFlags |= GTF_SPILLED;
5852 call->gtFlags &= ~GTF_SPILL;
5854 genUnspillRegIfNeeded(call);
5857 #endif // _TARGET_X86_
5859 regNumber returnReg;
5860 // TODO-Cleanup: For UNIX AMD64, we should not be allocating a return register for struct
5861 // returns that are on stack.
5862 // For the SIMD case, however, we do want a "return register", as the consumer of the call
5863 // will want the value in a register. In future we should flexibly allocate this return
5864 // register, but that should be done with a general cleanup of the allocation of return
5865 // registers for structs.
5866 if (varTypeIsFloating(returnType)
5867 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || varTypeIsSIMD(returnType)))
5869 returnReg = REG_FLOATRET;
5873 returnReg = REG_INTRET;
5875 if (call->gtRegNum != returnReg)
5877 inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
5879 genProduceReg(call);
5883 // If there is nothing next, that means the result is thrown away, so this value is not live.
5884 // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
5885 if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
5887 gcInfo.gcMarkRegSetNpt(RBM_INTRET);
5890 #if defined(_TARGET_X86_)
5891 //-------------------------------------------------------------------------
5892 // Create a label for tracking of region protected by the monitor in synchronized methods.
5893 // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
5894 // so the GC state vars have been updated before creating the label.
5896 if (fPossibleSyncHelperCall)
5898 switch (helperNum) {
5899 case CORINFO_HELP_MON_ENTER:
5900 case CORINFO_HELP_MON_ENTER_STATIC:
5901 noway_assert(compiler->syncStartEmitCookie == NULL);
5902 compiler->syncStartEmitCookie = getEmitter()->emitAddLabel(
5903 gcInfo.gcVarPtrSetCur,
5904 gcInfo.gcRegGCrefSetCur,
5905 gcInfo.gcRegByrefSetCur);
5906 noway_assert(compiler->syncStartEmitCookie != NULL);
5908 case CORINFO_HELP_MON_EXIT:
5909 case CORINFO_HELP_MON_EXIT_STATIC:
5910 noway_assert(compiler->syncEndEmitCookie == NULL);
5911 compiler->syncEndEmitCookie = getEmitter()->emitAddLabel(
5912 gcInfo.gcVarPtrSetCur,
5913 gcInfo.gcRegGCrefSetCur,
5914 gcInfo.gcRegByrefSetCur);
5915 noway_assert(compiler->syncEndEmitCookie != NULL);
5922 // Is the caller supposed to pop the arguments?
5923 if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0))
5925 genAdjustSP(stackArgBytes);
5927 #endif // _TARGET_X86_
5930 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5931 //------------------------------------------------------------------------
5932 // getStructTypeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
5935 // 'structDesc' struct description
5936 // 'type0' returns the type of the first eightbyte.
5937 // 'type1' returns the type of the second eightbyte.
5938 // 'offset0' returns the offset of the first eightbyte.
5939 // 'offset1' returns the offset of the second eightbyte.
5942 void CodeGen::getStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
5945 unsigned __int8* offset0,
5946 unsigned __int8* offset1)
5948 *offset0 = structDesc.eightByteOffsets[0];
5949 *offset1 = structDesc.eightByteOffsets[1];
5951 *type0 = TYP_UNKNOWN;
5952 *type1 = TYP_UNKNOWN;
5954 // Set the first eightbyte data
5955 if (structDesc.eightByteCount >= 1)
5957 *type0 = compiler->getEightByteType(structDesc, 0);
5960 // Set the second eight byte data
5961 if (structDesc.eightByteCount == 2)
5963 *type1 = compiler->getEightByteType(structDesc, 1);
5966 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
5968 // Produce code for a GT_JMP node.
5969 // The arguments of the caller needs to be transferred to the callee before exiting caller.
5970 // The actual jump to callee is generated as part of caller epilog sequence.
5971 // Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
5972 void CodeGen::genJmpMethod(GenTreePtr jmp)
5974 assert(jmp->OperGet() == GT_JMP);
5975 assert(compiler->compJmpOpUsed);
5977 // If no arguments, nothing to do
5978 if (compiler->info.compArgsCount == 0)
5983 // Make sure register arguments are in their initial registers
5984 // and stack arguments are put back as well.
5988 // First move any en-registered stack arguments back to the stack.
5989 // At the same time any reg arg not in correct reg is moved back to its stack location.
5991 // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
5992 // But that would require us to deal with circularity while moving values around. Spilling
5993 // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
5994 // are not frequent.
5995 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
5997 varDsc = compiler->lvaTable + varNum;
5999 if (varDsc->lvPromoted)
6001 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
6003 unsigned fieldVarNum = varDsc->lvFieldLclStart;
6004 varDsc = compiler->lvaTable + fieldVarNum;
6006 noway_assert(varDsc->lvIsParam);
6008 if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
6010 // Skip reg args which are already in its right register for jmp call.
6011 // If not, we will spill such args to their stack locations.
6013 // If we need to generate a tail call profiler hook, then spill all
6014 // arg regs to free them up for the callback.
6015 if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
6018 else if (varDsc->lvRegNum == REG_STK)
6020 // Skip args which are currently living in stack.
6024 // If we came here it means either a reg argument not in the right register or
6025 // a stack argument currently living in a register. In either case the following
6026 // assert should hold.
6027 assert(varDsc->lvRegNum != REG_STK);
6029 var_types loadType = varDsc->lvaArgType();
6030 getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
6032 // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
6033 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
6034 // Therefore manually update life of varDsc->lvRegNum.
6035 regMaskTP tempMask = varDsc->lvRegMask();
6036 regSet.RemoveMaskVars(tempMask);
6037 gcInfo.gcMarkRegSetNpt(tempMask);
6038 if (compiler->lvaIsGCTracked(varDsc))
6041 if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
6043 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
6047 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
6051 VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
6055 #ifdef PROFILING_SUPPORTED
6056 // At this point all arg regs are free.
6057 // Emit tail call profiler callback.
6058 genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
6061 // Next move any un-enregistered register arguments back to their register.
6062 regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
6063 unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
6064 for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
6066 varDsc = compiler->lvaTable + varNum;
6067 if (varDsc->lvPromoted)
6069 noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
6071 unsigned fieldVarNum = varDsc->lvFieldLclStart;
6072 varDsc = compiler->lvaTable + fieldVarNum;
6074 noway_assert(varDsc->lvIsParam);
6076 // Skip if arg not passed in a register.
6077 if (!varDsc->lvIsRegArg)
6080 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
6081 if (varTypeIsStruct(varDsc))
6083 CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
6084 assert(typeHnd != nullptr);
6086 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
6087 compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
6088 assert(structDesc.passedInRegisters);
6090 unsigned __int8 offset0 = 0;
6091 unsigned __int8 offset1 = 0;
6092 var_types type0 = TYP_UNKNOWN;
6093 var_types type1 = TYP_UNKNOWN;
6095 // Get the eightbyte data
6096 getStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
6098 // Move the values into the right registers.
6101 // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
6102 // Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another basic block may not be expecting it.
6103 // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
6104 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
6105 if (type0 != TYP_UNKNOWN)
6107 getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
6108 regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
6109 gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
6112 if (type1 != TYP_UNKNOWN)
6114 getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
6115 regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
6116 gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
6119 if (varDsc->lvTracked)
6121 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
6125 #endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
6127 // Register argument
6128 noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
6130 // Is register argument already in the right register?
6131 // If not load it from its stack location.
6132 var_types loadType = varDsc->lvaArgType();
6133 regNumber argReg = varDsc->lvArgReg; // incoming arg register
6135 if (varDsc->lvRegNum != argReg)
6137 assert(genIsValidReg(argReg));
6138 getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
6140 // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
6141 // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
6142 // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
6143 // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
6144 regSet.AddMaskVars(genRegMask(argReg));
6145 gcInfo.gcMarkRegPtrVal(argReg, loadType);
6146 if (compiler->lvaIsGCTracked(varDsc))
6149 if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
6151 JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
6155 JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
6159 VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
6165 // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register.
6166 if (compiler->info.compIsVarArgs)
6168 regNumber intArgReg;
6169 var_types loadType = varDsc->lvaArgType();
6170 regNumber argReg = varDsc->lvArgReg; // incoming arg register
6172 if (varTypeIsFloating(loadType))
6174 intArgReg = compiler->getCallArgIntRegister(argReg);
6175 instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
6176 inst_RV_RV(ins, argReg, intArgReg, loadType);
6183 fixedIntArgMask |= genRegMask(intArgReg);
6185 if (intArgReg == REG_ARG_0)
6187 assert(firstArgVarNum == BAD_VAR_NUM);
6188 firstArgVarNum = varNum;
6191 #endif // FEATURE_VARARG
6195 // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
6196 // load the remaining arg registers (both int and float) from the corresponding
6197 // shadow stack slots. This is for the reason that we don't know the number and type
6198 // of non-fixed params passed by the caller, therefore we have to assume the worst case
6199 // of caller passing float/double args both in int and float arg regs.
6201 // The caller could have passed gc-ref/byref type var args. Since these are var args
6202 // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
6203 // remaining arg registers from shadow stack slots as non-gc interruptible.
6204 if (fixedIntArgMask != RBM_NONE)
6206 assert(compiler->info.compIsVarArgs);
6207 assert(firstArgVarNum != BAD_VAR_NUM);
6209 regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
6210 if (remainingIntArgMask != RBM_NONE)
6212 instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
6213 getEmitter()->emitDisableGC();
6214 for (int argNum = 0, argOffset=0; argNum < MAX_REG_ARG; ++argNum)
6216 regNumber argReg = intArgRegs[argNum];
6217 regMaskTP argRegMask = genRegMask(argReg);
6219 if ((remainingIntArgMask & argRegMask) != 0)
6221 remainingIntArgMask &= ~argRegMask;
6222 getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
6224 // also load it in corresponding float arg reg
6225 regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
6226 inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
6229 argOffset += REGSIZE_BYTES;
6231 getEmitter()->emitEnableGC();
6234 #endif // FEATURE_VARARG
6237 // produce code for a GT_LEA subnode
6238 void CodeGen::genLeaInstruction(GenTreeAddrMode *lea)
6240 emitAttr size = emitTypeSize(lea);
6241 genConsumeOperands(lea);
6243 if (lea->Base() && lea->Index())
6245 regNumber baseReg = lea->Base()->gtRegNum;
6246 regNumber indexReg = lea->Index()->gtRegNum;
6247 getEmitter()->emitIns_R_ARX (INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
6249 else if (lea->Base())
6251 getEmitter()->emitIns_R_AR (INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset);
6253 else if (lea->Index())
6255 getEmitter()->emitIns_R_ARX (INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale, lea->gtOffset);
6261 /*****************************************************************************
6262 * The condition to use for (the jmp/set for) the given type of compare operation are
6263 * returned in 'jmpKind' array. The corresponding elements of jmpToTrueLabel indicate
6264 * the branch target when the condition being true.
6266 * jmpToTrueLabel[i]= true implies branch to the target when the compare operation is true.
6267 * jmpToTrueLabel[i]= false implies branch to the target when the compare operation is false.
6270 void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree,
6271 emitJumpKind jmpKind[2],
6272 bool jmpToTrueLabel[2])
6274 // Except for BEQ (= ordered GT_EQ) both jumps are to the true label.
6275 jmpToTrueLabel[0] = true;
6276 jmpToTrueLabel[1] = true;
6278 // For integer comparisons just use genJumpKindForOper
6279 if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
6281 jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, (cmpTree->gtFlags & GTF_UNSIGNED) != 0);
6282 jmpKind[1] = EJ_NONE;
6286 assert(cmpTree->OperIsCompare());
6288 // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
6289 // while generating code for compare opererators (e.g. GT_EQ etc).
6290 if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
6293 switch (cmpTree->gtOper)
6298 jmpKind[1] = EJ_NONE;
6303 jmpKind[0] = EJ_jbe;
6304 jmpKind[1] = EJ_NONE;
6308 jmpKind[0] = EJ_jpe;
6309 jmpKind[1] = EJ_jne;
6314 jmpKind[1] = EJ_NONE;
6323 switch (cmpTree->gtOper)
6328 jmpKind[1] = EJ_NONE;
6333 jmpKind[0] = EJ_jae;
6334 jmpKind[1] = EJ_NONE;
6338 jmpKind[0] = EJ_jne;
6339 jmpKind[1] = EJ_NONE;
6343 jmpKind[0] = EJ_jpe;
6345 jmpToTrueLabel[0] = false;
6355 #if !defined(_TARGET_64BIT_)
6356 //------------------------------------------------------------------------
6357 // genJumpKindsForTreeLongHi: Generate the jump types for compare
6358 // operators of the high parts of a compare with long type operands
6362 // cmpTree - The GT_CMP node
6363 // jmpKind - Return array of jump kinds
6364 // jmpToTrueLabel - Return array of if the jump is going to true label
6369 void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree,
6370 emitJumpKind jmpKind[2],
6371 bool jmpToTrueLabel[2])
6373 jmpToTrueLabel[0] = true;
6374 jmpToTrueLabel[1] = true;
6376 assert(cmpTree->OperIsCompare());
6378 bool isUnsigned = (cmpTree->gtFlags & GTF_UNSIGNED) != 0;
6380 // For comparison of longs on x86, GT_LT, GT_LE, GT_GT, and GT_GE need two jump cases, since
6381 // only if the hi operators are equal will we fall through.
6382 switch (cmpTree->gtOper)
6396 jmpKind[1] = EJ_jne;
6401 jmpKind[0] = EJ_jne;
6402 jmpKind[1] = EJ_NONE;
6410 //------------------------------------------------------------------------
6411 // genJumpKindsForTreeLongLo: Generate the jump types for compare
6412 // operators of the low parts of a compare with long type operands
6416 // cmpTree - The GT_CMP node
6417 // jmpKind - Return array of jump kinds
6418 // jmpToTrueLabel - Return array of if the jump is going to true label
6423 void CodeGen::genJumpKindsForTreeLongLo(GenTreePtr cmpTree,
6424 emitJumpKind jmpKind[2],
6425 bool jmpToTrueLabel[2])
6427 jmpToTrueLabel[0] = true;
6428 jmpToTrueLabel[1] = true;
6430 assert(cmpTree->OperIsCompare());
6431 jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, true);
6432 jmpKind[1] = EJ_NONE;
6435 //------------------------------------------------------------------------
6436 // genCompareLong: Generate code for comparing two longs on x86
6439 // treeNode - the compare tree
6444 // For long compares, we need to compare the high parts of operands first, then the low parts.
6445 // If the high compare is false, we do not need to compare the low parts. For less than and
6446 // greater than, if the high compare is true, we can assume the entire compare is true.
6448 // Opcode x86 equivalent Comment
6449 // ------ -------------- -------
6450 // GT_EQ cmp hiOp1,hiOp2 If any part is not equal, the entire compare
6451 // jne label is false.
6455 // GT_NE cmp hiOp1,hiOp2 If any part is not equal, the entire compare
6456 // jne label is true.
6460 // GT_LT cmp hiOp1,hiOp2 If hiOp1 is greater than hiOp2, the entire compare
6461 // ja label is false. If hiOp1 is less than hiOp2, the entire
6462 // jne label compare is true.
6466 // GT_LE cmp hiOp1,hiOp2 If hiOp1 is greater than hiOp2, the entire compare
6467 // ja label is false. If hiOp1 is less than hiOp2, the entire
6468 // jne label compare is true.
6472 // GT_GT cmp hiOp1,hiOp2 If hiOp1 is greater than hiOp2, the entire compare
6473 // ja label is true. If hiOp1 is less than hiOp2, the entire
6474 // jne label compare is false.
6478 // GT_GE cmp hiOp1,hiOp2 If hiOp1 is greater than hiOp2, the entire compare
6479 // ja label is true. If hiOp1 is less than hiOp2, the entire
6480 // jne label compare is false.
6484 // TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
6485 void CodeGen::genCompareLong(GenTreePtr treeNode)
6487 assert(treeNode->OperIsCompare());
6489 GenTreeOp *tree = treeNode->AsOp();
6490 GenTreePtr op1 = tree->gtOp1;
6491 GenTreePtr op2 = tree->gtOp2;
6493 genConsumeOperands(tree);
6495 assert(varTypeIsLong(op1->TypeGet()) && varTypeIsLong(op2->TypeGet()));
6496 regNumber targetReg = treeNode->gtRegNum;
6498 GenTreePtr loOp1 = op1->gtGetOp1();
6499 GenTreePtr hiOp1 = op1->gtGetOp2();
6500 GenTreePtr loOp2 = op2->gtGetOp1();
6501 GenTreePtr hiOp2 = op2->gtGetOp2();
6503 // Create compare for the high parts
6504 instruction ins = INS_cmp;
6505 var_types cmpType = TYP_INT;
6506 emitAttr cmpAttr = emitTypeSize(cmpType);
6508 // Emit the compare instruction
6509 getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
6511 // Generate the first jump for the high compare
6512 emitJumpKind jumpKind[2];
6513 bool branchToTrueLabel[2];
6514 genJumpKindsForTreeLongHi(tree, jumpKind, branchToTrueLabel);
6516 BasicBlock* label = genCreateTempLabel();
6517 inst_JMP(jumpKind[0], label);
6519 // Generate the second jump for LE, LT, GT, and GE. We only do the lower compare if
6520 // the hi parts are equal
6521 if (jumpKind[1] != EJ_NONE)
6523 assert(branchToTrueLabel[1]);
6524 inst_JMP(jumpKind[1], label);
6527 // Now create compare for low parts
6530 cmpAttr = emitTypeSize(cmpType);
6532 // Emit the comparison
6533 getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
6535 // Define the label for hi jump target here. If we have jumped here, we want to set
6536 // the target register based on the jump kind of the lower half (the actual compare
6537 // type). If we have fallen through, then we are doing a normal int compare for the
6540 genDefineTempLabel(label);
6541 if (targetReg != REG_NA)
6543 emitJumpKind jumpKindLo[2];
6544 bool branchToTrueLabelLo[2];
6546 // The low set must be unsigned
6547 genJumpKindsForTreeLongLo(tree, jumpKindLo, branchToTrueLabelLo);
6548 inst_SET(jumpKindLo[0], targetReg);
6550 // Set the higher bytes to 0
6551 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
6552 genProduceReg(tree);
6555 #endif //!defined(_TARGET_64BIT_)
6557 //------------------------------------------------------------------------
6558 // genCompareFloat: Generate code for comparing two floating point values
6561 // treeNode - the compare tree
6566 // SSE2 instruction ucomis[s|d] is performs unordered comparison and
6567 // updates rFLAGS register as follows.
6568 // Result of compare ZF PF CF
6569 // ----------------- ------------
6570 // Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN.
6575 // From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
6576 // unordered comparison of floating point values. That is *.UN comparisons result in true when
6577 // one of the operands is a NaN whereas ordered comparisons results in false.
6579 // Opcode Amd64 equivalent Comment
6580 // ------ ----------------- --------
6581 // BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either a<b or unordered from the above table.
6584 // BLT(a,b) ucomis[s|d] b, a Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
6587 // BGT.UN(a,b) ucomis[s|d] b, a branch if b<a or unordered ==> branch if a>b or unordered
6590 // BGT(a, b) ucomis[s|d] a, b branch if a>b
6593 // BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
6596 // BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b
6599 // BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered
6602 // BGE(a,b) ucomis[s|d] a, b branch if a>=b
6605 // BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec.
6606 // je This case is given for completeness, in case if JIT generates such
6607 // a gentree internally.
6609 // BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b.
6614 // BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is
6615 // jne given for completeness, in case if JIT generates such a gentree
6618 // BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b
6622 // As we can see from the above equalities that the operands of a compare operator need to be
6623 // reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
6624 void CodeGen::genCompareFloat(GenTreePtr treeNode)
6626 assert(treeNode->OperIsCompare());
6628 GenTreeOp *tree = treeNode->AsOp();
6629 GenTreePtr op1 = tree->gtOp1;
6630 GenTreePtr op2 = tree->gtOp2;
6631 var_types op1Type = op1->TypeGet();
6632 var_types op2Type = op2->TypeGet();
6634 genConsumeOperands(tree);
6636 assert(varTypeIsFloating(op1Type));
6637 assert(op1Type == op2Type);
6639 regNumber targetReg = treeNode->gtRegNum;
6644 if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
6646 // Unordered comparison case
6647 reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
6651 reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
6656 GenTreePtr tmp = op1;
6661 ins = ins_FloatCompare(op1Type);
6662 cmpAttr = emitTypeSize(op1Type);
6664 getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
6666 // Are we evaluating this into a register?
6667 if (targetReg != REG_NA)
6669 genSetRegToCond(targetReg, tree);
6670 genProduceReg(tree);
6674 //------------------------------------------------------------------------
6675 // genCompareInt: Generate code for comparing ints or, on amd64, longs.
6678 // treeNode - the compare tree
6682 void CodeGen::genCompareInt(GenTreePtr treeNode)
6684 assert(treeNode->OperIsCompare());
6686 GenTreeOp *tree = treeNode->AsOp();
6687 GenTreePtr op1 = tree->gtOp1;
6688 GenTreePtr op2 = tree->gtOp2;
6689 var_types op1Type = op1->TypeGet();
6690 var_types op2Type = op2->TypeGet();
6692 genConsumeOperands(tree);
6697 regNumber targetReg = treeNode->gtRegNum;
6698 assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
6699 assert(!varTypeIsFloating(op2Type));
6702 assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
6703 #endif // _TARGET_X86_
6705 // By default we use an int32 sized cmp instruction
6708 var_types cmpType = TYP_INT;
6710 // In the if/then/else statement below we may change the
6711 // 'cmpType' and/or 'ins' to generate a smaller instruction
6713 // Are we comparing two values that are the same size?
6715 if (genTypeSize(op1Type) == genTypeSize(op2Type))
6717 if (op1Type == op2Type)
6719 // If both types are exactly the same we can use that type
6722 else if (genTypeSize(op1Type) == 8)
6724 // If we have two different int64 types we need to use a long compare
6728 cmpAttr = emitTypeSize(cmpType);
6730 else // Here we know that (op1Type != op2Type)
6732 // Do we have a short compare against a constant in op2?
6734 // We checked for this case in LowerCmp() and if we can perform a small
6735 // compare immediate we labeled this compare with a GTF_RELOP_SMALL
6736 // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
6738 if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
6740 assert(varTypeIsSmall(op1Type));
6743 #ifdef _TARGET_AMD64_
6744 else // compare two different sized operands
6746 // For this case we don't want any memory operands, only registers or immediates
6748 assert(!op1->isContainedMemoryOp());
6749 assert(!op2->isContainedMemoryOp());
6751 // Check for the case where one operand is an int64 type
6752 // Lower should have placed 32-bit operand in a register
6753 // for signed comparisons we will sign extend the 32-bit value in place.
6755 bool op1Is64Bit = (genTypeSize(op1Type) == 8);
6756 bool op2Is64Bit = (genTypeSize(op2Type) == 8);
6760 if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
6762 assert(op2->gtRegNum != REG_NA);
6763 inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
6766 else if (op2Is64Bit)
6769 if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
6771 assert(op1->gtRegNum != REG_NA);
6775 #endif // _TARGET_AMD64_
6777 cmpAttr = emitTypeSize(cmpType);
6780 // See if we can generate a "test" instruction instead of a "cmp".
6781 // For this to generate the correct conditional branch we must have
6782 // a compare against zero.
6786 if (op1->isContained())
6788 // op1 can be a contained memory op
6789 // or the special contained GT_AND that we created in Lowering::LowerCmp()
6791 if ((op1->OperGet() == GT_AND))
6793 noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
6795 ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
6796 op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
6797 op1 = op1->gtOp.gtOp1; // overwrite op1
6799 if (op1->isContainedMemoryOp())
6801 // use the size andOp1 if it is a contained memoryop.
6802 cmpAttr = emitTypeSize(op1->TypeGet());
6804 // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
6807 else // op1 is not contained thus it must be in a register
6810 op2 = op1; // we will generate "test reg1,reg1"
6811 // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
6815 getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
6817 // Are we evaluating this into a register?
6818 if (targetReg != REG_NA)
6820 genSetRegToCond(targetReg, tree);
6821 genProduceReg(tree);
6824 // Generate code to materialize a condition into a register
6825 // (the condition codes must already have been appropriately set)
6827 void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
6829 noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
6831 emitJumpKind jumpKind[2];
6832 bool branchToTrueLabel[2];
6833 genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
6835 if (jumpKind[1] == EJ_NONE)
6837 // Set (lower byte of) reg according to the flags
6838 inst_SET(jumpKind[0], dstReg);
6842 // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
6843 // These are represented by two conditions.
6846 if (tree->gtOper == GT_EQ)
6848 // This must be an ordered comparison.
6849 assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
6853 // This must be BNE.UN
6854 assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
6858 // Here is the sample code generated in each case:
6859 // BEQ == cmp, jpe <false label>, je <true label>
6860 // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
6861 // setnp reg // if (PF==0) reg = 1 else reg = 0
6862 // jpe L1 // Jmp if PF==1
6866 // BNE.UN == cmp, jpe <true label>, jne <true label>
6867 // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
6873 // reverse the jmpkind condition before setting dstReg if it is to false label.
6874 inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
6876 BasicBlock* label = genCreateTempLabel();
6877 inst_JMP(jumpKind[0], label);
6879 // second branch is always to true label
6880 assert(branchToTrueLabel[1]);
6881 inst_SET(jumpKind[1], dstReg);
6882 genDefineTempLabel(label);
6885 var_types treeType = tree->TypeGet();
6886 if (treeType == TYP_INT || treeType == TYP_LONG)
6888 // Set the higher bytes to 0
6889 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
6893 // FP types have been converted to flow
6894 noway_assert(treeType == TYP_BYTE);
6898 //------------------------------------------------------------------------
6899 // genIntToIntCast: Generate code for an integer cast
6900 // This method handles integer overflow checking casts
6901 // as well as ordinary integer casts.
6904 // treeNode - The GT_CAST node
6910 // The treeNode is not a contained node and must have an assigned register.
6911 // For a signed convert from byte, the source must be in a byte-addressable register.
6912 // Neither the source nor target type can be a floating point type.
6914 // TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
6915 // TODO: refactor to use getCastDescription
6917 void CodeGen::genIntToIntCast(GenTreePtr treeNode)
6919 assert(treeNode->OperGet() == GT_CAST);
6921 GenTreePtr castOp = treeNode->gtCast.CastOp();
6922 regNumber targetReg = treeNode->gtRegNum;
6923 regNumber sourceReg = castOp->gtRegNum;
6924 var_types dstType = treeNode->CastToType();
6925 bool isUnsignedDst = varTypeIsUnsigned(dstType);
6926 var_types srcType = genActualType(castOp->TypeGet());
6927 bool isUnsignedSrc = varTypeIsUnsigned(srcType);
6929 // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
6930 if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
6932 srcType = genUnsignedType(srcType);
6933 isUnsignedSrc = true;
6936 bool requiresOverflowCheck = false;
6937 bool needAndAfter = false;
6939 assert(genIsValidIntReg(targetReg));
6940 assert(genIsValidIntReg(sourceReg));
6942 instruction ins = INS_invalid;
6943 emitAttr size = EA_UNKNOWN;
6945 if (genTypeSize(srcType) < genTypeSize(dstType))
6949 // Is this an Overflow checking cast?
6950 // We only need to handle one case, as the other casts can never overflow.
6951 // cast from TYP_INT to TYP_ULONG
6953 if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG))
6955 requiresOverflowCheck = true;
6956 size = EA_ATTR(genTypeSize(srcType));
6961 // we need the source size
6962 size = EA_ATTR(genTypeSize(srcType));
6963 noway_assert(size < EA_PTRSIZE);
6965 ins = ins_Move_Extend(srcType, castOp->InReg());
6968 Special case: ins_Move_Extend assumes the destination type is no bigger
6969 than TYP_INT. movsx and movzx can already extend all the way to
6970 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd),
6971 but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode.
6973 if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE))
6976 NYI_X86("Cast to 64 bit for x86/RyuJIT");
6977 #else // !_TARGET_X86_
6979 #endif // !_TARGET_X86_
6983 Special case: for a cast of byte to char we first
6984 have to expand the byte (w/ sign extension), then
6985 mask off the high bits.
6986 Use 'movsx' followed by 'and'
6988 if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE))
6990 noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
6991 needAndAfter = true;
6997 // Narrowing cast, or sign-changing cast
6998 noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
7000 // Is this an Overflow checking cast?
7001 if (treeNode->gtOverflow())
7003 requiresOverflowCheck = true;
7004 size = EA_ATTR(genTypeSize(srcType));
7009 size = EA_ATTR(genTypeSize(dstType));
7010 ins = ins_Move_Extend(dstType, castOp->InReg());
7014 noway_assert(ins != INS_invalid);
7016 genConsumeReg(castOp);
7018 if (requiresOverflowCheck)
7020 ssize_t typeMin = 0;
7021 ssize_t typeMax = 0;
7022 ssize_t typeMask = 0;
7023 bool needScratchReg = false;
7024 bool signCheckOnly = false;
7026 /* Do we need to compare the value, or just check masks */
7031 typeMask = ssize_t((int)0xFFFFFF80);
7032 typeMin = SCHAR_MIN;
7033 typeMax = SCHAR_MAX;
7037 typeMask = ssize_t((int)0xFFFFFF00L);
7041 typeMask = ssize_t((int)0xFFFF8000);
7047 typeMask = ssize_t((int)0xFFFF0000L);
7051 if (srcType == TYP_UINT)
7053 signCheckOnly = true;
7057 typeMask = 0xFFFFFFFF80000000LL;
7064 if (srcType == TYP_INT)
7066 signCheckOnly = true;
7070 needScratchReg = true;
7075 noway_assert(srcType == TYP_ULONG);
7076 signCheckOnly = true;
7080 noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
7081 signCheckOnly = true;
7085 NO_WAY("Unknown type");
7091 // We only need to check for a negative value in sourceReg
7092 inst_RV_IV(INS_cmp, sourceReg, 0, size);
7093 genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
7094 if (dstType == TYP_ULONG)
7096 // cast from TYP_INT to TYP_ULONG
7097 // The upper bits on sourceReg will already be zero by definition (x64)
7098 srcType = TYP_ULONG;
7104 regNumber tmpReg = REG_NA;
7108 // We need an additional temp register
7109 // Make sure we have exactly one allocated.
7110 assert(treeNode->gtRsvdRegs != RBM_NONE);
7111 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
7112 tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
7115 // When we are converting from unsigned or to unsigned, we
7116 // will only have to check for any bits set using 'typeMask'
7117 if (isUnsignedSrc || isUnsignedDst)
7121 inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
7122 inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits
7123 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); // Thow if result shift is non-zero
7127 noway_assert(typeMask != 0);
7128 inst_RV_IV(INS_TEST, sourceReg, typeMask, size);
7129 genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
7134 // For a narrowing signed cast
7136 // We must check the value is in a signed range.
7138 // Compare with the MAX
7140 noway_assert((typeMin != 0) && (typeMax != 0));
7142 inst_RV_IV(INS_cmp, sourceReg, typeMax, size);
7143 genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
7145 // Compare with the MIN
7147 inst_RV_IV(INS_cmp, sourceReg, typeMin, size);
7148 genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
7152 if (targetReg != sourceReg)
7153 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
7155 else // non-overflow checking cast
7157 noway_assert(size < EA_PTRSIZE || srcType == dstType);
7159 // We may have code transformations that result in casts where srcType is the same as dstType.
7160 // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
7161 // long constant to a long lclVar.
7162 if (srcType == dstType)
7166 /* Is the value sitting in a non-byte-addressable register? */
7167 else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg))
7171 // for unsigned values we can AND, so it need not be a byte register
7176 // Move the value into a byte register
7177 noway_assert(!"Signed byte convert from non-byte-addressable register");
7180 /* Generate "mov targetReg, castOp->gtReg */
7181 if (targetReg != sourceReg)
7183 inst_RV_RV(INS_mov, targetReg, sourceReg, srcType);
7186 else if (treeNode->gtSetFlags() && isUnsignedDst && castOp->InReg() && (targetReg == sourceReg))
7188 // if we (might) need to set the flags and the value is in the same register
7189 // and we have an unsigned value then use AND instead of MOVZX
7190 noway_assert(ins == INS_movzx || ins == INS_mov);
7196 noway_assert((needAndAfter == false) && isUnsignedDst);
7198 /* Generate "and reg, MASK */
7199 insFlags flags = treeNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
7200 unsigned fillPattern;
7201 if (size == EA_1BYTE)
7203 else if (size == EA_2BYTE)
7204 fillPattern = 0xffff;
7206 fillPattern = 0xffffffff;
7208 inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE, flags);
7210 #ifdef _TARGET_AMD64_
7211 else if (ins == INS_movsxd)
7213 noway_assert(!needAndAfter);
7214 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
7216 #endif // _TARGET_AMD64_
7217 else if (ins == INS_mov)
7219 noway_assert(!needAndAfter);
7220 if (targetReg != sourceReg)
7222 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
7227 noway_assert(ins == INS_movsx || ins == INS_movzx);
7229 /* Generate "mov targetReg, castOp->gtReg */
7230 inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
7232 /* Mask off high bits for cast from byte to char */
7235 noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
7236 insFlags flags = treeNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
7237 inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE, flags);
7242 genProduceReg(treeNode);
7245 //------------------------------------------------------------------------
7246 // genFloatToFloatCast: Generate code for a cast between float and double
7249 // treeNode - The GT_CAST node
7255 // Cast is a non-overflow conversion.
7256 // The treeNode must have an assigned register.
7257 // The cast is between float and double or vice versa.
7260 CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
7262 // float <--> double conversions are always non-overflow ones
7263 assert(treeNode->OperGet() == GT_CAST);
7264 assert(!treeNode->gtOverflow());
7266 regNumber targetReg = treeNode->gtRegNum;
7267 assert(genIsValidFloatReg(targetReg));
7269 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7271 // If not contained, must be a valid float reg.
7272 if (!op1->isContained())
7274 assert(genIsValidFloatReg(op1->gtRegNum));
7278 var_types dstType = treeNode->CastToType();
7279 var_types srcType = op1->TypeGet();
7280 assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
7283 genConsumeOperands(treeNode->AsOp());
7284 if (srcType == dstType && targetReg == op1->gtRegNum)
7286 // source and destinations types are the same and also reside in the same register.
7287 // we just need to consume and produce the reg in this case.
7292 instruction ins = ins_FloatConv(dstType, srcType);
7293 getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
7296 genProduceReg(treeNode);
7299 //------------------------------------------------------------------------
7300 // genIntToFloatCast: Generate code to cast an int/long to float/double
7303 // treeNode - The GT_CAST node
7309 // Cast is a non-overflow conversion.
7310 // The treeNode must have an assigned register.
7311 // SrcType= int32/uint32/int64/uint64 and DstType=float/double.
7314 CodeGen::genIntToFloatCast(GenTreePtr treeNode)
7316 // int type --> float/double conversions are always non-overflow ones
7317 assert(treeNode->OperGet() == GT_CAST);
7318 assert(!treeNode->gtOverflow());
7320 regNumber targetReg = treeNode->gtRegNum;
7321 assert(genIsValidFloatReg(targetReg));
7323 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7325 if (!op1->isContained())
7327 assert(genIsValidIntReg(op1->gtRegNum));
7331 var_types dstType = treeNode->CastToType();
7332 var_types srcType = op1->TypeGet();
7333 assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
7335 #if !defined(_TARGET_64BIT_)
7336 NYI_IF(varTypeIsLong(srcType), "Conversion from long to float");
7337 #endif // !defined(_TARGET_64BIT_)
7339 // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
7340 // ensure srcType of a cast is non gc-type. Codegen should never see BYREF as source type except
7341 // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
7342 // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
7343 // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
7344 // temp and using temp as operand of cast operation.
7345 if (srcType == TYP_BYREF)
7347 noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
7348 srcType = TYP_I_IMPL;
7351 // force the srcType to unsigned if GT_UNSIGNED flag is set
7352 if (treeNode->gtFlags & GTF_UNSIGNED)
7354 srcType = genUnsignedType(srcType);
7357 noway_assert(!varTypeIsGC(srcType));
7359 // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
7360 // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
7361 // either the front-end or lowering phase to have generated two levels of cast.
7362 // The first one is for widening smaller int type to int32 and the second one is
7363 // to the float/double.
7364 emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
7365 noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) ||
7366 (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
7368 // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
7369 // here since they should have been lowered apropriately.
7370 noway_assert(srcType != TYP_UINT);
7371 noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
7373 // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
7374 // which does a partial write to lower 4/8 bytes of xmm register keeping the other
7375 // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
7376 // the partial write could introduce a false dependency and could cause a stall
7377 // if there are further uses of xmmReg. We have such a case occuring with a
7378 // customer reported version of SpectralNorm benchmark, resulting in 2x perf
7379 // regression. To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
7380 // cvtsi2ss/sd instruction.
7382 genConsumeOperands(treeNode->AsOp());
7383 getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
7385 // Note that here we need to specify srcType that will determine
7386 // the size of source reg/mem operand and rex.w prefix.
7387 instruction ins = ins_FloatConv(dstType, TYP_INT);
7388 getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
7390 // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
7391 // will interpret ULONG value as LONG. Hence we need to adjust the
7392 // result if sign-bit of srcType is set.
7393 if (srcType == TYP_ULONG)
7395 assert(dstType == TYP_DOUBLE);
7396 assert(!op1->isContained());
7398 // Set the flags without modifying op1.
7399 // test op1Reg, op1Reg
7400 inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
7402 // No need to adjust result if op1 >= 0 i.e. positive
7404 BasicBlock* label = genCreateTempLabel();
7405 inst_JMP(EJ_jge, label);
7407 // Adjust the result
7408 // result = result + 0x43f00000 00000000
7409 // addsd resultReg, 0x43f00000 00000000
7410 GenTreePtr *cns = &u8ToDblBitmask;
7411 if (*cns == nullptr)
7414 static_assert_no_msg(sizeof(double) == sizeof(__int64));
7415 *((__int64 *)&d) = 0x43f0000000000000LL;
7417 *cns = genMakeConst(&d, dstType, treeNode, true);
7419 inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns);
7421 genDefineTempLabel(label);
7424 genProduceReg(treeNode);
7427 //------------------------------------------------------------------------
7428 // genFloatToIntCast: Generate code to cast float/double to int/long
7431 // treeNode - The GT_CAST node
7437 // Cast is a non-overflow conversion.
7438 // The treeNode must have an assigned register.
7439 // SrcType=float/double and DstType= int32/uint32/int64/uint64
7441 // TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
7444 CodeGen::genFloatToIntCast(GenTreePtr treeNode)
7446 // we don't expect to see overflow detecting float/double --> int type conversions here
7447 // as they should have been converted into helper calls by front-end.
7448 assert(treeNode->OperGet() == GT_CAST);
7449 assert(!treeNode->gtOverflow());
7451 regNumber targetReg = treeNode->gtRegNum;
7452 assert(genIsValidIntReg(targetReg));
7454 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7456 if (!op1->isContained())
7458 assert(genIsValidFloatReg(op1->gtRegNum));
7462 var_types dstType = treeNode->CastToType();
7463 var_types srcType = op1->TypeGet();
7464 assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
7466 // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
7467 // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
7468 // front-end or lowering phase to have generated two levels of cast. The first one is
7469 // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
7470 // the required smaller int type.
7471 emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
7472 noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) ||
7473 (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
7475 // We shouldn't be seeing uint64 here as it should have been converted
7476 // into a helper call by either front-end or lowering phase.
7477 noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
7479 // If the dstType is TYP_UINT, we have 32-bits to encode the
7480 // float number. Any of 33rd or above bits can be the sign bit.
7481 // To acheive it we pretend as if we are converting it to a long.
7482 if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
7487 // Note that we need to specify dstType here so that it will determine
7488 // the size of destination integer register and also the rex.w prefix.
7489 genConsumeOperands(treeNode->AsOp());
7490 instruction ins = ins_FloatConv(TYP_INT, srcType);
7491 getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
7492 genProduceReg(treeNode);
7495 //------------------------------------------------------------------------
7496 // genCkfinite: Generate code for ckfinite opcode.
7499 // treeNode - The GT_CKFINITE node
7505 // GT_CKFINITE node has reserved an internal register.
7507 // TODO-XArch-CQ - mark the operand as contained if known to be in
7508 // memory (e.g. field or an array element).
7511 CodeGen::genCkfinite(GenTreePtr treeNode)
7513 assert(treeNode->OperGet() == GT_CKFINITE);
7515 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7516 var_types targetType = treeNode->TypeGet();
7517 int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
7519 // Extract exponent into a register.
7520 assert(treeNode->gtRsvdRegs != RBM_NONE);
7521 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
7522 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
7524 instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
7525 inst_RV_RV(ins, genConsumeReg(op1), tmpReg, targetType);
7526 if (targetType == TYP_DOUBLE)
7528 // right shift by 32 bits to get to exponent.
7529 inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
7532 // Mask of exponent with all 1's and check if the exponent is all 1's
7533 inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
7534 inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
7536 // If exponent is all 1's, throw ArithmeticException
7537 genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
7539 // if it is a finite value copy it to targetReg
7540 if (treeNode->gtRegNum != op1->gtRegNum)
7542 inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
7544 genProduceReg(treeNode);
7547 #ifdef _TARGET_AMD64_
7548 int CodeGenInterface::genSPtoFPdelta()
7552 // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
7553 // RBP needs to be reported in unwind codes. This case would arise for methods
7555 if (compiler->compLocallocUsed)
7557 // We cannot base delta computation on compLclFrameSize since it changes from
7558 // tentative to final frame layout and hence there is a possibility of
7559 // under-estimating offset of vars from FP, which in turn results in under-
7560 // estimating instruction size.
7562 // To be predictive and so as never to under-estimate offset of vars from FP
7563 // we will always position FP at min(240, outgoing arg area size).
7564 delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
7566 else if (compiler->opts.compDbgEnC)
7568 // vm assumption on EnC methods is that rsp and rbp are equal
7573 delta = genTotalFrameSize();
7580 //---------------------------------------------------------------------
7581 // genTotalFrameSize - return the total size of the stack frame, including local size,
7582 // callee-saved register size, etc. For AMD64, this does not include the caller-pushed
7589 int CodeGenInterface::genTotalFrameSize()
7591 assert(!IsUninitialized(compiler->compCalleeRegsPushed));
7593 int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES +
7594 compiler->compLclFrameSize;
7596 assert(totalFrameSize >= 0);
7597 return totalFrameSize;
7600 //---------------------------------------------------------------------
7601 // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
7602 // This number is going to be negative, since the Caller-SP is at a higher
7603 // address than the frame pointer.
7605 // There must be a frame pointer to call this function!
7607 // We can't compute this directly from the Caller-SP, since the frame pointer
7608 // is based on a maximum delta from Initial-SP, so first we find SP, then
7609 // compute the FP offset.
7611 int CodeGenInterface::genCallerSPtoFPdelta()
7613 assert(isFramePointerUsed());
7614 int callerSPtoFPdelta;
7616 callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
7618 assert(callerSPtoFPdelta <= 0);
7619 return callerSPtoFPdelta;
7623 //---------------------------------------------------------------------
7624 // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
7626 // This number will be negative.
7628 int CodeGenInterface::genCallerSPtoInitialSPdelta()
7630 int callerSPtoSPdelta = 0;
7632 callerSPtoSPdelta -= genTotalFrameSize();
7633 callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
7635 // compCalleeRegsPushed does not account for the frame pointer
7636 // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
7637 if (isFramePointerUsed())
7639 callerSPtoSPdelta -= REGSIZE_BYTES;
7642 assert(callerSPtoSPdelta <= 0);
7643 return callerSPtoSPdelta;
7645 #endif // _TARGET_AMD64_
7647 //-----------------------------------------------------------------------------------------
7648 // genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
7651 // treeNode - tree node
7657 // i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
7658 // ii) tree type is floating point type.
7659 // iii) caller of this routine needs to call genProduceReg()
7661 CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
7663 regNumber targetReg = treeNode->gtRegNum;
7664 var_types targetType = treeNode->TypeGet();
7665 assert(varTypeIsFloating(targetType));
7669 GenTreePtr *bitMask = nullptr;
7670 instruction ins = INS_invalid;
7671 void *cnsAddr = nullptr;
7672 bool dblAlign = false;
7674 switch(treeNode->OperGet())
7677 // Neg(x) = flip the sign bit.
7678 // Neg(f) = f ^ 0x80000000
7679 // Neg(d) = d ^ 0x8000000000000000
7680 ins = genGetInsForOper(GT_XOR, targetType);
7681 if (targetType == TYP_FLOAT)
7683 bitMask = &negBitmaskFlt;
7685 static_assert_no_msg(sizeof(float) == sizeof(int));
7686 *((int *)&f) = 0x80000000;
7691 bitMask = &negBitmaskDbl;
7693 static_assert_no_msg(sizeof(double) == sizeof(__int64));
7694 *((__int64*)&d) = 0x8000000000000000LL;
7701 assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
7703 // Abs(x) = set sign-bit to zero
7704 // Abs(f) = f & 0x7fffffff
7705 // Abs(d) = d & 0x7fffffffffffffff
7706 ins = genGetInsForOper(GT_AND, targetType);
7707 if (targetType == TYP_FLOAT)
7709 bitMask = &absBitmaskFlt;
7711 static_assert_no_msg(sizeof(float) == sizeof(int));
7712 *((int *)&f) = 0x7fffffff;
7717 bitMask = &absBitmaskDbl;
7719 static_assert_no_msg(sizeof(double) == sizeof(__int64));
7720 *((__int64*)&d) = 0x7fffffffffffffffLL;
7727 assert(!"genSSE2: unsupported oper");
7732 if (*bitMask == nullptr)
7734 assert(cnsAddr != nullptr);
7735 *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign);
7738 // We need an additional register for bitmask.
7739 // Make sure we have one allocated.
7740 assert(treeNode->gtRsvdRegs != RBM_NONE);
7741 assert(genCountBits(treeNode->gtRsvdRegs) == 1);
7742 regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
7744 // Move operand into targetReg only if the reg reserved for
7745 // internal purpose is not the same as targetReg.
7746 GenTreePtr op1 = treeNode->gtOp.gtOp1;
7747 assert(!op1->isContained());
7748 regNumber operandReg = genConsumeReg(op1);
7749 if (tmpReg != targetReg)
7751 if (operandReg != targetReg)
7753 inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
7756 operandReg = tmpReg;
7759 inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask);
7760 assert(ins != INS_invalid);
7761 inst_RV_RV(ins, targetReg, operandReg, targetType);
7764 //---------------------------------------------------------------------
7765 // genIntrinsic - generate code for a given intrinsic
7768 // treeNode - the GT_INTRINSIC node
7774 CodeGen::genIntrinsic(GenTreePtr treeNode)
7776 // Right now only Sqrt/Abs are treated as math intrinsics.
7777 switch(treeNode->gtIntrinsic.gtIntrinsicId)
7779 case CORINFO_INTRINSIC_Sqrt:
7780 noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
7781 genConsumeOperands(treeNode->AsOp());
7782 getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode, treeNode->gtOp.gtOp1);
7785 case CORINFO_INTRINSIC_Abs:
7786 genSSE2BitwiseOp(treeNode);
7790 assert(!"genIntrinsic: Unsupported intrinsic");
7794 genProduceReg(treeNode);
7797 //------------------------------------------------------------------------------------------------ //
7798 // getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
7801 // The number of the first argument with stack slot on the caller's frame.
7804 // On Windows the caller always creates slots (homing space) in its frame for the
7805 // first 4 arguments of a calee (register passed args). So, the the variable number
7806 // (lclNum) for the first argument with a stack slot is always 0.
7807 // For System V systems there is no such calling convention requirement, and the code needs to find
7808 // the first stack passed argument from the caller. This is done by iterating over
7809 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
7812 CodeGen::getFirstArgWithStackSlot()
7814 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
7815 unsigned baseVarNum = compiler->lvaFirstStackIncomingArgNum;
7817 if (compiler->lvaFirstStackIncomingArgNum != BAD_VAR_NUM)
7819 baseVarNum = compiler->lvaFirstStackIncomingArgNum;
7823 // Iterate over all the local variables in the lclvartable.
7824 // They contain all the implicit argumets - thisPtr, retBuf,
7825 // generic context, PInvoke cookie, var arg cookie,no-standard args, etc.
7826 LclVarDsc* varDsc = nullptr;
7827 for (unsigned i = 0; i < compiler->lvaCount; i++)
7829 varDsc = &(compiler->lvaTable[i]);
7831 // We are iterating over the arguments only.
7832 assert(varDsc->lvIsParam);
7834 if (varDsc->lvArgReg == REG_STK)
7836 baseVarNum = compiler->lvaFirstStackIncomingArgNum = i;
7840 assert(varDsc != nullptr);
7844 #elif defined(_TARGET_AMD64_)
7847 // Not implemented for x86.
7848 NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
7850 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
7853 //-------------------------------------------------------------------------- //
7854 // getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
7857 // treeNode - the GT_PUTARG_STK node
7860 // The number of the base variable.
7863 // If tail call the outgoing args are placed in the caller's incoming arg stack space.
7864 // Otherwise, they go in the outgoing arg area on the current frame.
7866 // On Windows the caller always creates slots (homing space) in its frame for the
7867 // first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
7868 // For System V systems there is no such calling convention requirement, and the code needs to find
7869 // the first stack passed argument from the caller. This is done by iterating over
7870 // all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
7873 CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
7875 assert(treeNode->OperGet() == GT_PUTARG_STK);
7877 unsigned baseVarNum;
7879 #if FEATURE_FASTTAILCALL
7880 bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
7882 const bool putInIncomingArgArea = false;
7885 // Whether to setup stk arg in incoming or out-going arg area?
7886 // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
7887 // All other calls - stk arg is setup in out-going arg area.
7888 if (putInIncomingArgArea)
7890 // See the note in the function header re: finding the first stack passed argument.
7891 baseVarNum = getFirstArgWithStackSlot();
7892 assert(baseVarNum != BAD_VAR_NUM);
7895 // This must be a fast tail call.
7896 assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
7898 // Since it is a fast tail call, the existence of first incoming arg is guaranteed
7899 // because fast tail call requires that in-coming arg area of caller is >= out-going
7900 // arg area required for tail call.
7901 LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
7902 assert(varDsc != nullptr);
7904 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
7905 assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
7906 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
7907 // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
7908 assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
7909 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
7914 #if FEATURE_FIXED_OUT_ARGS
7915 baseVarNum = compiler->lvaOutgoingArgSpaceVar;
7916 #else // !FEATURE_FIXED_OUT_ARGS
7917 NYI_X86("Stack args for x86/RyuJIT");
7918 baseVarNum = BAD_VAR_NUM;
7919 #endif // !FEATURE_FIXED_OUT_ARGS
7925 //--------------------------------------------------------------------- //
7926 // genPutStructArgStk - generate code for passing an arg on the stack.
7929 // treeNode - the GT_PUTARG_STK node
7930 // targetType - the type of the treeNode
7936 CodeGen::genPutArgStk(GenTreePtr treeNode)
7938 var_types targetType = treeNode->TypeGet();
7940 noway_assert(targetType != TYP_STRUCT);
7942 // The following logic is applicable for x86 arch.
7943 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
7945 GenTreePtr data = treeNode->gtOp.gtOp1;
7947 // On a 32-bit target, all of the long arguments have been decomposed into
7948 // a separate putarg_stk for each of the upper and lower halves.
7949 noway_assert(targetType != TYP_LONG);
7951 int argSize = genTypeSize(genActualType(targetType));
7952 genStackLevel += argSize;
7954 // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
7955 if (data->isContainedIntOrIImmed())
7957 if (data->IsIconHandle())
7959 inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
7963 inst_IV(INS_push, data->gtIntCon.gtIconVal);
7966 else if (data->isContained())
7968 NYI_X86("Contained putarg_stk of non-constant");
7972 genConsumeReg(data);
7973 if (varTypeIsIntegralOrI(targetType))
7975 inst_RV(INS_push, data->gtRegNum, targetType);
7980 inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
7981 getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
7984 #else // !_TARGET_X86_
7986 unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
7988 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
7990 if (varTypeIsStruct(targetType))
7992 genPutStructArgStk(treeNode, baseVarNum);
7995 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
7997 noway_assert(targetType != TYP_STRUCT);
7998 assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
8000 // Get argument offset on stack.
8001 // Here we cross check that argument offset hasn't changed from lowering to codegen since
8002 // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
8003 int argOffset = treeNode->AsPutArgStk()->getArgOffset();
8006 fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
8007 assert(curArgTabEntry);
8008 assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
8011 GenTreePtr data = treeNode->gtGetOp1();
8013 if (data->isContained())
8015 getEmitter()->emitIns_S_I(ins_Store(targetType),
8016 emitTypeSize(targetType),
8019 (int)data->AsIntConCommon()->IconValue());
8023 genConsumeReg(data);
8024 getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum, argOffset);
8027 #endif // !_TARGET_X86_
8030 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8032 //---------------------------------------------------------------------
8033 // genPutStructArgStk - generate code for copying a struct arg on the stack by value.
8034 // In case there are references to heap object in the struct,
8035 // it generates the gcinfo as well.
8038 // treeNode - the GT_PUTARG_STK node
8039 // baseVarNum - the variable number relative to which to put the argument on the stack.
8040 // For tail calls this is the baseVarNum = 0.
8041 // For non tail calls this is the outgoingArgSpace.
8047 CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
8049 assert(treeNode->OperGet() == GT_PUTARG_STK);
8050 assert(baseVarNum != BAD_VAR_NUM);
8052 var_types targetType = treeNode->TypeGet();
8054 if (varTypeIsSIMD(targetType))
8056 regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
8057 assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
8058 getEmitter()->emitIns_S_R(ins_Store(targetType),
8059 emitTypeSize(targetType),
8062 treeNode->AsPutArgStk()->getArgOffset());
8066 assert(targetType == TYP_STRUCT);
8068 GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
8069 if (putArgStk->gtNumberReferenceSlots == 0)
8071 switch (putArgStk->gtPutArgStkKind)
8073 case GenTreePutArgStk::PutArgStkKindRepInstr:
8074 genStructPutArgRepMovs(putArgStk, baseVarNum);
8076 case GenTreePutArgStk::PutArgStkKindUnroll:
8077 genStructPutArgUnroll(putArgStk, baseVarNum);
8085 // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
8087 // Consume these registers.
8088 // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
8089 genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
8090 GenTreePtr dstAddr = putArgStk;
8091 GenTreePtr src = putArgStk->gtOp.gtOp1;
8092 assert(src->OperGet() == GT_LDOBJ);
8093 GenTreePtr srcAddr = src->gtGetOp1();
8095 gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
8097 unsigned slots = putArgStk->gtNumSlots;
8099 // We are always on the stack we don't need to use the write barrier.
8100 BYTE* gcPtrs = putArgStk->gtGcPtrs;
8101 unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
8104 unsigned copiedSlots = 0;
8110 // Let's see if we can use rep movsq instead of a sequence of movsq instructions
8111 // to save cycles and code size.
8113 unsigned nonGcSlotCount = 0;
8119 } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
8121 // If we have a very small contiguous non-gc region, it's better just to
8122 // emit a sequence of movsq instructions
8123 if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
8125 copiedSlots += nonGcSlotCount;
8126 while (nonGcSlotCount > 0)
8134 getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
8135 copiedSlots += nonGcSlotCount;
8136 instGen(INS_r_movsq);
8141 case TYPE_GC_REF: // Is an object ref
8142 case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
8144 // We have a GC (byref or ref) pointer
8145 // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
8146 // but the logic for emitting a GC info record is not available (it is internal for the emitter only.)
8147 // See emitGCVarLiveUpd function. If we could call it separately, we could do instGen(INS_movsq); and emission of gc info.
8149 getEmitter()->emitIns_R_AR(ins_Load(srcAddr->TypeGet()), emitTypeSize(srcAddr), REG_RCX, REG_RSI, 0);
8150 getEmitter()->emitIns_S_R(ins_Store(srcAddr->TypeGet()),
8151 emitTypeSize(srcAddr),
8154 ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
8156 getEmitter()->emitIns_R_I(INS_add, emitTypeSize(srcAddr), REG_RSI, TARGET_POINTER_SIZE);
8157 getEmitter()->emitIns_R_I(INS_add, EA_8BYTE, REG_RDI, TARGET_POINTER_SIZE);
8170 assert(gcPtrCount == 0);
8171 gcInfo.gcMarkRegSetNpt(RBM_RSI);
8174 #endif //defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
8176 /*****************************************************************************
8178 * Create and record GC Info for the function.
8180 #ifdef _TARGET_AMD64_
8182 #else // !_TARGET_AMD64_
8184 #endif // !_TARGET_AMD64_
8185 CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUG_ARG(void* codePtr))
8187 #ifdef JIT32_GCENCODER
8188 return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUG_ARG(codePtr));
8189 #else // !JIT32_GCENCODER
8190 genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUG_ARG(codePtr));
8191 #endif // !JIT32_GCENCODER
8194 #ifdef JIT32_GCENCODER
8195 void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUG_ARG(void* codePtr))
8204 compiler->compInfoBlkSize = gcInfo.gcInfoBlockHdrSave(headerBuf,
8212 size_t argTabOffset = 0;
8213 size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
8217 if (genInterruptible)
8219 gcHeaderISize += compiler->compInfoBlkSize;
8220 gcPtrMapISize += ptrMapSize;
8224 gcHeaderNSize += compiler->compInfoBlkSize;
8225 gcPtrMapNSize += ptrMapSize;
8228 #endif // DISPLAY_SIZES
8230 compiler->compInfoBlkSize += ptrMapSize;
8232 /* Allocate the info block for the method */
8234 compiler->compInfoBlkAddr = (BYTE *) compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
8236 #if 0 // VERBOSE_SIZES
8237 // TODO-X86-Cleanup: 'dataSize', below, is not defined
8239 // if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
8241 printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
8242 compiler->info.compILCodeSize,
8243 compiler->compInfoBlkSize,
8244 codeSize + dataSize,
8245 codeSize + dataSize - prologSize - epilogSize,
8246 100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
8247 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
8248 compiler->info.compClassName,
8249 compiler->info.compMethodName);
8254 /* Fill in the info block and return it to the caller */
8256 void* infoPtr = compiler->compInfoBlkAddr;
8258 /* Create the method info block: header followed by GC tracking tables */
8260 compiler->compInfoBlkAddr += gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1,
8267 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
8268 compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
8269 assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
8275 BYTE * temp = (BYTE *)infoPtr;
8276 unsigned size = compiler->compInfoBlkAddr - temp;
8277 BYTE * ptab = temp + headerSize;
8279 noway_assert(size == headerSize + ptrMapSize);
8281 printf("Method info block - header [%u bytes]:", headerSize);
8283 for (unsigned i = 0; i < size; i++)
8287 printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
8288 printf("\n %04X: %*c", i & ~0xF, 3*(i&0xF), ' ');
8293 printf("\n %04X: ", i);
8296 printf("%02X ", *temp++);
8306 if (compiler->opts.dspGCtbls)
8308 const BYTE *base = (BYTE *)infoPtr;
8310 unsigned methodSize;
8313 printf("GC Info for method %s\n", compiler->info.compFullName);
8314 printf("GC info size = %3u\n", compiler->compInfoBlkSize);
8316 size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
8317 // printf("size of header encoding is %3u\n", size);
8320 if (compiler->opts.dspGCtbls)
8323 size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
8324 // printf("size of pointer table is %3u\n", size);
8326 noway_assert(compiler->compInfoBlkAddr == (base+size));
8332 if (jitOpts.testMask & 128)
8334 for (unsigned offs = 0; offs < codeSize; offs++)
8336 gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
8340 #endif // DUMP_GC_TABLES
8342 /* Make sure we ended up generating the expected number of bytes */
8344 noway_assert(compiler->compInfoBlkAddr == (BYTE *)infoPtr + compiler->compInfoBlkSize);
8349 #else // !JIT32_GCENCODER
8351 CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUG_ARG(void* codePtr))
8353 IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
8354 GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc);
8355 assert(gcInfoEncoder);
8357 // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
8358 gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
8360 // First we figure out the encoder ID's for the stack slots and registers.
8361 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
8362 // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
8363 gcInfoEncoder->FinalizeSlotIds();
8364 // Now we can actually use those slot ID's to declare live ranges.
8365 gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
8367 #if defined(DEBUGGING_SUPPORT)
8368 if (compiler->opts.compDbgEnC)
8370 // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
8374 // -saved 'this' pointer and bool for synchronized methods
8376 // 4 slots for RBP + return address + RSI + RDI
8377 int preservedAreaSize = 4 * REGSIZE_BYTES;
8379 if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
8381 if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
8382 preservedAreaSize += REGSIZE_BYTES;
8384 // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
8385 preservedAreaSize += 4;
8388 // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the frame
8389 gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
8393 gcInfoEncoder->Build();
8395 //GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
8396 //let's save the values anyway for debugging purposes
8397 compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
8398 compiler->compInfoBlkSize = 0; //not exposed by the GCEncoder interface
8400 #endif // !JIT32_GCENCODER
8402 /*****************************************************************************
8403 * Emit a call to a helper function.
8407 void CodeGen::genEmitHelperCall(unsigned helper,
8410 #ifndef LEGACY_BACKEND
8411 ,regNumber callTargetReg /*= REG_NA */
8412 #endif // !LEGACY_BACKEND
8415 void* addr = nullptr;
8416 void* pAddr = nullptr;
8418 emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
8419 addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
8420 regNumber callTarget = REG_NA;
8421 regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
8425 assert(pAddr != nullptr);
8427 // Absolute indirect call addr
8428 // Note: Order of checks is important. First always check for pc-relative and next
8429 // zero-relative. Because the former encoding is 1-byte smaller than the latter.
8430 if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
8431 genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
8433 // generate call whose target is specified by 32-bit offset relative to PC or zero.
8434 callType = emitter::EC_FUNC_TOKEN_INDIR;
8439 #ifdef _TARGET_AMD64_
8440 // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
8441 // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
8446 if (callTargetReg == REG_NA)
8448 // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
8449 // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
8450 callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
8451 regMaskTP callTargetMask = genRegMask(callTargetReg);
8452 noway_assert((callTargetMask & killMask) == callTargetMask);
8456 // The call target must not overwrite any live variable, though it may not be in the
8457 // kill set for the call.
8458 regMaskTP callTargetMask = genRegMask(callTargetReg);
8459 noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
8463 callTarget = callTargetReg;
8464 CodeGen::genSetRegToIcon(callTarget, (ssize_t) pAddr, TYP_I_IMPL);
8465 callType = emitter::EC_INDIR_ARD;
8469 getEmitter()->emitIns_Call(callType,
8470 compiler->eeFindHelper(helper),
8471 INDEBUG_LDISASM_COMMA(nullptr)
8475 gcInfo.gcVarPtrSetCur,
8476 gcInfo.gcRegGCrefSetCur,
8477 gcInfo.gcRegByrefSetCur,
8478 BAD_IL_OFFSET, /* IL offset */
8479 callTarget, /* ireg */
8480 REG_NA, 0, 0, /* xreg, xmul, disp */
8482 emitter::emitNoGChelper(helper));
8485 regTracker.rsTrashRegSet(killMask);
8486 regTracker.rsTrashRegsForGCInterruptability();
8489 #if !defined(_TARGET_64BIT_)
8490 //-----------------------------------------------------------------------------
8492 // Code Generation for Long integers
8494 //-----------------------------------------------------------------------------
8496 //------------------------------------------------------------------------
8497 // genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
8500 // treeNode - A TYP_LONG lclVar node.
8506 // 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
8507 // Its operand must be a GT_LONG node.
8509 void CodeGen::genStoreLongLclVar(GenTree* treeNode)
8511 emitter* emit = getEmitter();
8513 GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
8514 unsigned lclNum = lclNode->gtLclNum;
8515 LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
8516 assert(varDsc->TypeGet() == TYP_LONG);
8517 assert(!varDsc->lvPromoted);
8518 GenTreePtr op1 = treeNode->gtOp.gtOp1;
8519 noway_assert(op1->OperGet() == GT_LONG);
8520 genConsumeRegs(op1);
8522 // Definitions of register candidates will have been lowered to 2 int lclVars.
8523 assert(!treeNode->InReg());
8525 GenTreePtr loVal = op1->gtGetOp1();
8526 GenTreePtr hiVal = op1->gtGetOp2();
8527 // NYI: Contained immediates.
8528 NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
8529 emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
8530 emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
8532 #endif // !defined(_TARGET_64BIT_)
8534 /*****************************************************************************
8535 * Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
8536 * (it's as good a place as any), then use COMPLUS_JitLateDisasm=* to see if the late
8537 * disassembler thinks the instructions as the same as we do.
8540 // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
8541 // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
8542 //#define ALL_XARCH_EMITTER_UNIT_TESTS
8544 #if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
8545 void CodeGen::genAmd64EmitterUnitTests()
8552 if (!compiler->opts.altJit)
8554 // No point doing this in a "real" JIT.
8558 // Mark the "fake" instructions in the output.
8559 printf("*************** In genAmd64EmitterUnitTests()\n");
8562 // genDefineTempLabel(genCreateTempLabel());
8563 // to create artificial labels to help separate groups of tests.
8569 #ifdef ALL_XARCH_EMITTER_UNIT_TESTS
8570 #ifdef FEATURE_AVX_SUPPORT
8571 genDefineTempLabel(genCreateTempLabel());
8573 // vhaddpd ymm0,ymm1,ymm2
8574 getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8575 // vaddss xmm0,xmm1,xmm2
8576 getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8577 // vaddsd xmm0,xmm1,xmm2
8578 getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8579 // vaddps xmm0,xmm1,xmm2
8580 getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8581 // vaddps ymm0,ymm1,ymm2
8582 getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8583 // vaddpd xmm0,xmm1,xmm2
8584 getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8585 // vaddpd ymm0,ymm1,ymm2
8586 getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8587 // vsubss xmm0,xmm1,xmm2
8588 getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8589 // vsubsd xmm0,xmm1,xmm2
8590 getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8591 // vsubps ymm0,ymm1,ymm2
8592 getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8593 // vsubps ymm0,ymm1,ymm2
8594 getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8595 // vsubpd xmm0,xmm1,xmm2
8596 getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8597 // vsubpd ymm0,ymm1,ymm2
8598 getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8599 // vmulss xmm0,xmm1,xmm2
8600 getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8601 // vmulsd xmm0,xmm1,xmm2
8602 getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8603 // vmulps xmm0,xmm1,xmm2
8604 getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8605 // vmulpd xmm0,xmm1,xmm2
8606 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8607 // vmulps ymm0,ymm1,ymm2
8608 getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8609 // vmulpd ymm0,ymm1,ymm2
8610 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8611 // vandps xmm0,xmm1,xmm2
8612 getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8613 // vandpd xmm0,xmm1,xmm2
8614 getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8615 // vandps ymm0,ymm1,ymm2
8616 getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8617 // vandpd ymm0,ymm1,ymm2
8618 getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8619 // vorps xmm0,xmm1,xmm2
8620 getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8621 // vorpd xmm0,xmm1,xmm2
8622 getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8623 // vorps ymm0,ymm1,ymm2
8624 getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8625 // vorpd ymm0,ymm1,ymm2
8626 getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8627 // vdivss xmm0,xmm1,xmm2
8628 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8629 // vdivsd xmm0,xmm1,xmm2
8630 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8631 // vdivss xmm0,xmm1,xmm2
8632 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8633 // vdivsd xmm0,xmm1,xmm2
8634 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8636 // vdivss xmm0,xmm1,xmm2
8637 getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8638 // vdivsd xmm0,xmm1,xmm2
8639 getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
8640 #endif // FEATURE_AVX_SUPPORT
8641 #endif // ALL_XARCH_EMITTER_UNIT_TESTS
8642 printf("*************** End of genAmd64EmitterUnitTests()\n");
8645 #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
8648 /*****************************************************************************/
8649 #ifdef DEBUGGING_SUPPORT
8650 /*****************************************************************************
8653 * Called for every scope info piece to record by the main genSetScopeInfo()
8656 void CodeGen::genSetScopeInfo (unsigned which,
8657 UNATIVE_OFFSET startOffs,
8658 UNATIVE_OFFSET length,
8662 Compiler::siVarLoc& varLoc)
8664 /* We need to do some mapping while reporting back these variables */
8666 unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
8667 noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
8669 VarName name = nullptr;
8673 for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
8675 if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
8677 name = compiler->info.compVarScopes[scopeNum].vsdName;
8681 // Hang on to this compiler->info.
8683 TrnslLocalVarInfo &tlvi = genTrnslLocalVarInfo[which];
8685 tlvi.tlviVarNum = ilVarNum;
8686 tlvi.tlviLVnum = LVnum;
8687 tlvi.tlviName = name;
8688 tlvi.tlviStartPC = startOffs;
8689 tlvi.tlviLength = length;
8690 tlvi.tlviAvailable = avail;
8691 tlvi.tlviVarLoc = varLoc;
8695 compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
8697 #endif // DEBUGGING_SUPPORT
8699 #endif // _TARGET_AMD64_
8701 #endif // !LEGACY_BACKEND