1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XX Postconditions (for the nodes currently handled): XX
13 XX - All operands requiring a register are explicit in the graph XX
15 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
16 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
24 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
28 #if !defined(_TARGET_64BIT_)
29 #include "decomposelongs.h"
30 #endif // !defined(_TARGET_64BIT_)
32 //------------------------------------------------------------------------
33 // MakeSrcContained: Make "childNode" a contained node
36 // parentNode - is a non-leaf node that can contain its 'childNode'
37 // childNode - is an op that will now be contained by its parent.
40 // If 'childNode' it has any existing sources, they will now be sources for the parent.
42 void Lowering::MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode)
44 assert(!parentNode->OperIsLeaf());
45 int srcCount = childNode->gtLsraInfo.srcCount;
46 assert(srcCount >= 0);
47 m_lsra->clearOperandCounts(childNode);
48 assert(parentNode->gtLsraInfo.srcCount > 0);
49 parentNode->gtLsraInfo.srcCount += srcCount - 1;
52 //------------------------------------------------------------------------
53 // CheckImmedAndMakeContained: Checks if the 'childNode' is a containable immediate
54 // and, if so, makes it contained.
57 // parentNode - is any non-leaf node
58 // childNode - is an child op of 'parentNode'
61 // true if we are able to make childNode a contained immediate
63 bool Lowering::CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode)
65 assert(!parentNode->OperIsLeaf());
66 // If childNode is a containable immediate
67 if (IsContainableImmed(parentNode, childNode))
69 // then make it contained within the parentNode
70 MakeSrcContained(parentNode, childNode);
76 //------------------------------------------------------------------------
77 // IsSafeToContainMem: Checks for conflicts between childNode and parentNode,
78 // and returns 'true' iff memory operand childNode can be contained in parentNode.
81 // parentNode - a non-leaf binary node
82 // childNode - a memory op that is a child op of 'parentNode'
85 // true if it is safe to make childNode a contained memory operand.
87 bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode)
89 assert(parentNode->OperIsBinary());
90 assert(childNode->isMemoryOp());
92 unsigned int childFlags = (childNode->gtFlags & GTF_ALL_EFFECT);
95 for (node = childNode; node != parentNode; node = node->gtNext)
97 assert(node != nullptr);
99 if ((childFlags != 0) && node->IsCall())
101 bool isPureHelper = (node->gtCall.gtCallType == CT_HELPER) &&
102 comp->s_helperCallProperties.IsPure(comp->eeGetHelperNum(node->gtCall.gtCallMethHnd));
103 if (!isPureHelper && ((node->gtFlags & childFlags & GTF_ALL_EFFECT) != 0))
108 else if (node->OperIsStore() && comp->fgNodesMayInterfere(node, childNode))
117 //------------------------------------------------------------------------
119 // This is the main entry point for Lowering.
120 GenTree* Lowering::LowerNode(GenTree* node)
122 assert(node != nullptr);
123 switch (node->gtOper)
126 TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
134 return LowerAdd(node);
138 LowerUnsignedDivOrMod(node);
143 return LowerSignedDivOrMod(node);
146 return LowerSwitch(node);
162 LowerJmpMethod(node);
174 return LowerArrElem(node);
183 case GT_STORE_DYN_BLK:
184 LowerBlockStore(node->AsBlk());
189 if (node->TypeGet() == TYP_SIMD12)
191 // GT_SIMD node requiring to produce TYP_SIMD12 in fact
192 // produces a TYP_SIMD16 result
193 node->gtType = TYP_SIMD16;
198 case GT_STORE_LCL_VAR:
199 if (node->TypeGet() == TYP_SIMD12)
201 #ifdef _TARGET_64BIT_
203 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
204 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
205 // reading and writing purposes.
208 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
209 // registers or on stack, the upper most 4-bytes will be zero.
211 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
212 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
215 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
216 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
217 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
218 // there is no need to clear upper 4-bytes of Vector3 type args.
220 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
221 // Vector3 return values are returned two return registers and Caller assembles them into a
222 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
223 // type args in prolog and Vector3 type return value of a call
224 node->gtType = TYP_SIMD16;
226 NYI("Lowering of TYP_SIMD12 locals");
227 #endif // _TARGET_64BIT_
229 #endif // FEATURE_SIMD
232 case GT_STORE_LCL_FLD:
233 // TODO-1stClassStructs: Once we remove the requirement that all struct stores
234 // are block stores (GT_STORE_BLK or GT_STORE_OBJ), here is where we would put the local
235 // store under a block store if codegen will require it.
236 if (node->OperIsStore() && (node->TypeGet() == TYP_STRUCT) && (node->gtGetOp1()->OperGet() != GT_PHI))
238 #if FEATURE_MULTIREG_RET
239 GenTree* src = node->gtGetOp1();
240 assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal());
241 #else // !FEATURE_MULTIREG_RET
242 assert(!"Unexpected struct local store in Lowering");
243 #endif // !FEATURE_MULTIREG_RET
254 /** -- Switch Lowering --
255 * The main idea of switch lowering is to keep transparency of the register requirements of this node
256 * downstream in LSRA. Given that the switch instruction is inherently a control statement which in the JIT
257 * is represented as a simple tree node, at the time we actually generate code for it we end up
258 * generating instructions that actually modify the flow of execution that imposes complicated
259 * register requirement and lifetimes.
261 * So, for the purpose of LSRA, we want to have a more detailed specification of what a switch node actually
262 * means and more importantly, which and when do we need a register for each instruction we want to issue
263 * to correctly allocate them downstream.
265 * For this purpose, this procedure performs switch lowering in two different ways:
267 * a) Represent the switch statement as a zero-index jump table construct. This means that for every destination
268 * of the switch, we will store this destination in an array of addresses and the code generator will issue
269 * a data section where this array will live and will emit code that based on the switch index, will indirect and
270 * jump to the destination specified in the jump table.
272 * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch
273 * node for jump table based switches.
274 * The overall structure of a GT_SWITCH_TABLE is:
277 * |_________ localVar (a temporary local that holds the switch index)
278 * |_________ jumpTable (this is a special node that holds the address of the jump table array)
280 * Now, the way we morph a GT_SWITCH node into this lowered switch table node form is the following:
282 * Input: GT_SWITCH (inside a basic block whose Branch Type is BBJ_SWITCH)
283 * |_____ expr (an arbitrarily complex GT_NODE that represents the switch index)
285 * This gets transformed into the following statements inside a BBJ_COND basic block (the target would be
286 * the default case of the switch in case the conditional is evaluated to true).
288 * ----- original block, transformed
290 * |_____ tempLocal (a new temporary local variable used to store the switch index)
291 * |_____ expr (the index expression)
296 * |___ Int_Constant (This constant is the index of the default case
297 * that happens to be the highest index in the jump table).
298 * |___ tempLocal (The local variable were we stored the index expression).
300 * ----- new basic block
303 * |_____ jumpTable (a new jump table node that now LSRA can allocate registers for explicitly
304 * and LinearCodeGen will be responsible to generate downstream).
306 * This way there are no implicit temporaries.
308 * b) For small-sized switches, we will actually morph them into a series of conditionals of the form
309 * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case }
310 * (For the default case conditional, we'll be constructing the exact same code as the jump table case one).
311 * else if (case == firstCase){ goto jumpTable[1]; }
312 * else if (case == secondCase) { goto jumptable[2]; } and so on.
314 * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer
315 * require internal temporaries to maintain the index we're evaluating plus we're using existing code from
316 * LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and
317 * InstrGroups downstream.
320 GenTree* Lowering::LowerSwitch(GenTree* node)
324 BasicBlock** jumpTab;
326 assert(node->gtOper == GT_SWITCH);
328 // The first step is to build the default case conditional construct that is
329 // shared between both kinds of expansion of the switch node.
331 // To avoid confusion, we'll alias m_block to originalSwitchBB
332 // that represents the node we're morphing.
333 BasicBlock* originalSwitchBB = m_block;
334 LIR::Range& switchBBRange = LIR::AsRange(originalSwitchBB);
336 // jumpCnt is the number of elements in the jump table array.
337 // jumpTab is the actual pointer to the jump table array.
338 // targetCnt is the number of unique targets in the jump table array.
339 jumpCnt = originalSwitchBB->bbJumpSwt->bbsCount;
340 jumpTab = originalSwitchBB->bbJumpSwt->bbsDstTab;
341 targetCnt = originalSwitchBB->NumSucc(comp);
343 // GT_SWITCH must be a top-level node with no use.
347 assert(!switchBBRange.TryGetUse(node, &use));
351 JITDUMP("Lowering switch BB%02u, %d cases\n", originalSwitchBB->bbNum, jumpCnt);
353 // Handle a degenerate case: if the switch has only a default case, just convert it
354 // to an unconditional branch. This should only happen in minopts or with debuggable
358 JITDUMP("Lowering switch BB%02u: single target; converting to BBJ_ALWAYS\n", originalSwitchBB->bbNum);
359 noway_assert(comp->opts.MinOpts() || comp->opts.compDbgCode);
360 if (originalSwitchBB->bbNext == jumpTab[0])
362 originalSwitchBB->bbJumpKind = BBJ_NONE;
363 originalSwitchBB->bbJumpDest = nullptr;
367 originalSwitchBB->bbJumpKind = BBJ_ALWAYS;
368 originalSwitchBB->bbJumpDest = jumpTab[0];
370 // Remove extra predecessor links if there was more than one case.
371 for (unsigned i = 1; i < jumpCnt; ++i)
373 (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB);
376 // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign
377 // the result of the child subtree to a temp.
378 GenTree* rhs = node->gtOp.gtOp1;
380 unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Lowering is creating a new local variable"));
381 comp->lvaSortAgain = true;
382 comp->lvaTable[lclNum].lvType = rhs->TypeGet();
383 comp->lvaTable[lclNum].lvRefCnt = 1;
385 GenTreeLclVar* store =
386 new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, rhs->TypeGet(), lclNum, BAD_IL_OFFSET);
388 store->gtFlags = (rhs->gtFlags & GTF_COMMON_MASK);
389 store->gtFlags |= GTF_VAR_DEF;
391 switchBBRange.InsertAfter(node, store);
392 switchBBRange.Remove(node);
397 noway_assert(jumpCnt >= 2);
399 // Spill the argument to the switch node into a local so that it can be used later.
400 unsigned blockWeight = originalSwitchBB->getBBWeight(comp);
402 LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node);
403 use.ReplaceWithLclVar(comp, blockWeight);
405 // GT_SWITCH(indexExpression) is now two statements:
406 // 1. a statement containing 'asg' (for temp = indexExpression)
407 // 2. and a statement with GT_SWITCH(temp)
409 assert(node->gtOper == GT_SWITCH);
410 GenTreePtr temp = node->gtOp.gtOp1;
411 assert(temp->gtOper == GT_LCL_VAR);
412 unsigned tempLclNum = temp->gtLclVarCommon.gtLclNum;
413 LclVarDsc* tempVarDsc = comp->lvaTable + tempLclNum;
414 var_types tempLclType = tempVarDsc->TypeGet();
416 BasicBlock* defaultBB = jumpTab[jumpCnt - 1];
417 BasicBlock* followingBB = originalSwitchBB->bbNext;
419 /* Is the number of cases right for a test and jump switch? */
420 const bool fFirstCaseFollows = (followingBB == jumpTab[0]);
421 const bool fDefaultFollows = (followingBB == defaultBB);
423 unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
425 // This means really just a single cmp/jcc (aka a simple if/else)
426 if (fFirstCaseFollows || fDefaultFollows)
428 minSwitchTabJumpCnt++;
431 #if defined(_TARGET_ARM_)
432 // On ARM for small switch tables we will
433 // generate a sequence of compare and branch instructions
434 // because the code to load the base of the switch
435 // table is huge and hideous due to the relocation... :(
436 minSwitchTabJumpCnt += 2;
437 #endif // _TARGET_ARM_
439 // Once we have the temporary variable, we construct the conditional branch for
440 // the default case. As stated above, this conditional is being shared between
441 // both GT_SWITCH lowering code paths.
442 // This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; }
443 GenTreePtr gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
444 comp->gtNewIconNode(jumpCnt - 2, TYP_INT));
446 // Make sure we perform an unsigned comparison, just in case the switch index in 'temp'
447 // is now less than zero 0 (that would also hit the default case).
448 gtDefaultCaseCond->gtFlags |= GTF_UNSIGNED;
450 /* Increment the lvRefCnt and lvRefCntWtd for temp */
451 tempVarDsc->incRefCnts(blockWeight, comp);
453 GenTreePtr gtDefaultCaseJump = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtDefaultCaseCond);
454 gtDefaultCaseJump->gtFlags = node->gtFlags;
456 LIR::Range condRange = LIR::SeqTree(comp, gtDefaultCaseJump);
457 switchBBRange.InsertAtEnd(std::move(condRange));
459 BasicBlock* afterDefaultCondBlock = comp->fgSplitBlockAfterNode(originalSwitchBB, condRange.LastNode());
461 // afterDefaultCondBlock is now the switch, and all the switch targets have it as a predecessor.
462 // originalSwitchBB is now a BBJ_NONE, and there is a predecessor edge in afterDefaultCondBlock
463 // representing the fall-through flow from originalSwitchBB.
464 assert(originalSwitchBB->bbJumpKind == BBJ_NONE);
465 assert(originalSwitchBB->bbNext == afterDefaultCondBlock);
466 assert(afterDefaultCondBlock->bbJumpKind == BBJ_SWITCH);
467 assert(afterDefaultCondBlock->bbJumpSwt->bbsHasDefault);
468 assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet.
470 // The GT_SWITCH code is still in originalSwitchBB (it will be removed later).
472 // Turn originalSwitchBB into a BBJ_COND.
473 originalSwitchBB->bbJumpKind = BBJ_COND;
474 originalSwitchBB->bbJumpDest = jumpTab[jumpCnt - 1];
476 // Fix the pred for the default case: the default block target still has originalSwitchBB
477 // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point
478 // to afterDefaultCondBlock.
479 flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock);
480 comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge);
482 // If we originally had 2 unique successors, check to see whether there is a unique
483 // non-default case, in which case we can eliminate the switch altogether.
484 // Note that the single unique successor case is handled above.
485 BasicBlock* uniqueSucc = nullptr;
488 uniqueSucc = jumpTab[0];
489 noway_assert(jumpCnt >= 2);
490 for (unsigned i = 1; i < jumpCnt - 1; i++)
492 if (jumpTab[i] != uniqueSucc)
494 uniqueSucc = nullptr;
499 if (uniqueSucc != nullptr)
501 // If the unique successor immediately follows this block, we have nothing to do -
502 // it will simply fall-through after we remove the switch, below.
503 // Otherwise, make this a BBJ_ALWAYS.
504 // Now, fixup the predecessor links to uniqueSucc. In the original jumpTab:
505 // jumpTab[i-1] was the default target, which we handled above,
506 // jumpTab[0] is the first target, and we'll leave that predecessor link.
507 // Remove any additional predecessor links to uniqueSucc.
508 for (unsigned i = 1; i < jumpCnt - 1; ++i)
510 assert(jumpTab[i] == uniqueSucc);
511 (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock);
513 if (afterDefaultCondBlock->bbNext == uniqueSucc)
515 afterDefaultCondBlock->bbJumpKind = BBJ_NONE;
516 afterDefaultCondBlock->bbJumpDest = nullptr;
520 afterDefaultCondBlock->bbJumpKind = BBJ_ALWAYS;
521 afterDefaultCondBlock->bbJumpDest = uniqueSucc;
524 // If the number of possible destinations is small enough, we proceed to expand the switch
525 // into a series of conditional branches, otherwise we follow the jump table based switch
527 else if (jumpCnt < minSwitchTabJumpCnt)
529 // Lower the switch into a series of compare and branch IR trees.
531 // In this case we will morph the node in the following way:
532 // 1. Generate a JTRUE statement to evaluate the default case. (This happens above.)
533 // 2. Start splitting the switch basic block into subsequent basic blocks, each of which will contain
534 // a statement that is responsible for performing a comparison of the table index and conditional
537 JITDUMP("Lowering switch BB%02u: using compare/branch expansion\n", originalSwitchBB->bbNum);
539 // We'll use 'afterDefaultCondBlock' for the first conditional. After that, we'll add new
540 // blocks. If we end up not needing it at all (say, if all the non-default cases just fall through),
542 bool fUsedAfterDefaultCondBlock = false;
543 BasicBlock* currentBlock = afterDefaultCondBlock;
544 LIR::Range* currentBBRange = &LIR::AsRange(currentBlock);
546 // Walk to entries 0 to jumpCnt - 1. If a case target follows, ignore it and let it fall through.
547 // If no case target follows, the last one doesn't need to be a compare/branch: it can be an
548 // unconditional branch.
549 bool fAnyTargetFollows = false;
550 for (unsigned i = 0; i < jumpCnt - 1; ++i)
552 assert(currentBlock != nullptr);
554 // Remove the switch from the predecessor list of this case target's block.
555 // We'll add the proper new predecessor edge later.
556 flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock);
558 if (jumpTab[i] == followingBB)
560 // This case label follows the switch; let it fall through.
561 fAnyTargetFollows = true;
565 // We need a block to put in the new compare and/or branch.
566 // If we haven't used the afterDefaultCondBlock yet, then use that.
567 if (fUsedAfterDefaultCondBlock)
569 BasicBlock* newBlock = comp->fgNewBBafter(BBJ_NONE, currentBlock, true);
570 comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor.
571 currentBlock = newBlock;
572 currentBBRange = &LIR::AsRange(currentBlock);
576 assert(currentBlock == afterDefaultCondBlock);
577 fUsedAfterDefaultCondBlock = true;
580 // We're going to have a branch, either a conditional or unconditional,
581 // to the target. Set the target.
582 currentBlock->bbJumpDest = jumpTab[i];
584 // Wire up the predecessor list for the "branch" case.
585 comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge);
587 if (!fAnyTargetFollows && (i == jumpCnt - 2))
589 // We're processing the last one, and there is no fall through from any case
590 // to the following block, so we can use an unconditional branch to the final
591 // case: there is no need to compare against the case index, since it's
592 // guaranteed to be taken (since the default case was handled first, above).
594 currentBlock->bbJumpKind = BBJ_ALWAYS;
598 // Otherwise, it's a conditional branch. Set the branch kind, then add the
599 // condition statement.
600 currentBlock->bbJumpKind = BBJ_COND;
602 // Now, build the conditional statement for the current case that is
607 // |____ (switchIndex) (The temp variable)
608 // |____ (ICon) (The actual case constant)
609 GenTreePtr gtCaseCond =
610 comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
611 comp->gtNewIconNode(i, TYP_INT));
612 /* Increment the lvRefCnt and lvRefCntWtd for temp */
613 tempVarDsc->incRefCnts(blockWeight, comp);
615 GenTreePtr gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
616 LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch);
617 currentBBRange->InsertAtEnd(std::move(condRange));
621 if (fAnyTargetFollows)
623 // There is a fall-through to the following block. In the loop
624 // above, we deleted all the predecessor edges from the switch.
625 // In this case, we need to add one back.
626 comp->fgAddRefPred(currentBlock->bbNext, currentBlock);
629 if (!fUsedAfterDefaultCondBlock)
631 // All the cases were fall-through! We don't need this block.
632 // Convert it from BBJ_SWITCH to BBJ_NONE and unset the BBF_DONT_REMOVE flag
633 // so fgRemoveBlock() doesn't complain.
634 JITDUMP("Lowering switch BB%02u: all switch cases were fall-through\n", originalSwitchBB->bbNum);
635 assert(currentBlock == afterDefaultCondBlock);
636 assert(currentBlock->bbJumpKind == BBJ_SWITCH);
637 currentBlock->bbJumpKind = BBJ_NONE;
638 currentBlock->bbFlags &= ~BBF_DONT_REMOVE;
639 comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block.
644 // Lower the switch into an indirect branch using a jump table:
646 // 1. Create the constant for the default case
647 // 2. Generate a GT_GE condition to compare to the default case
648 // 3. Generate a GT_JTRUE to jump.
649 // 4. Load the jump table address into a local (presumably the just
650 // created constant for GT_SWITCH).
651 // 5. Create a new node for the lowered switch, this will both generate
652 // the branch table and also will be responsible for the indirect
655 JITDUMP("Lowering switch BB%02u: using jump table expansion\n", originalSwitchBB->bbNum);
657 GenTreePtr gtTableSwitch =
658 comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, comp->gtNewLclvNode(tempLclNum, tempLclType),
659 comp->gtNewJmpTableNode());
660 /* Increment the lvRefCnt and lvRefCntWtd for temp */
661 tempVarDsc->incRefCnts(blockWeight, comp);
663 // this block no longer branches to the default block
664 afterDefaultCondBlock->bbJumpSwt->removeDefault();
665 comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock);
667 LIR::Range& afterDefaultCondBBRange = LIR::AsRange(afterDefaultCondBlock);
668 afterDefaultCondBBRange.InsertAtEnd(LIR::SeqTree(comp, gtTableSwitch));
671 GenTree* next = node->gtNext;
673 // Get rid of the GT_SWITCH(temp).
674 switchBBRange.Remove(node->gtOp.gtOp1);
675 switchBBRange.Remove(node);
680 // NOTE: this method deliberately does not update the call arg table. It must only
681 // be used by NewPutArg and LowerArg; these functions are responsible for updating
682 // the call arg table as necessary.
683 void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCopy)
685 assert(argSlot != nullptr);
686 assert(*argSlot != nullptr);
687 assert(putArgOrCopy->OperGet() == GT_PUTARG_REG || putArgOrCopy->OperGet() == GT_PUTARG_STK ||
688 putArgOrCopy->OperGet() == GT_COPY);
690 GenTree* arg = *argSlot;
692 // Replace the argument with the putarg/copy
693 *argSlot = putArgOrCopy;
694 putArgOrCopy->gtOp.gtOp1 = arg;
696 // Insert the putarg/copy into the block
697 BlockRange().InsertAfter(arg, putArgOrCopy);
700 //------------------------------------------------------------------------
701 // NewPutArg: rewrites the tree to put an arg in a register or on the stack.
704 // call - the call whose arg is being rewritten.
705 // arg - the arg being rewritten.
706 // info - the ArgTabEntry information for the argument.
707 // type - the type of the argument.
710 // The new tree that was created to put the arg in the right place
711 // or the incoming arg if the arg tree was not rewritten.
714 // call, arg, and info must be non-null.
717 // For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined)
718 // this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs
719 // for two eightbyte structs.
721 // For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
722 // (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers
723 // layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
724 // (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
726 GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr info, var_types type)
728 assert(call != nullptr);
729 assert(arg != nullptr);
730 assert(info != nullptr);
732 GenTreePtr putArg = nullptr;
733 bool updateArgTable = true;
735 #if !defined(_TARGET_64BIT_)
736 if (varTypeIsLong(type))
738 // For TYP_LONG, we leave the GT_LONG as the arg, and put the putArg below it.
739 // Therefore, we don't update the arg table entry.
740 updateArgTable = false;
743 #endif // !defined(_TARGET_64BIT_)
745 bool isOnStack = true;
746 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
747 if (varTypeIsStruct(type))
749 isOnStack = !info->structDesc.passedInRegisters;
753 isOnStack = info->regNum == REG_STK;
755 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
756 isOnStack = info->regNum == REG_STK;
757 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
762 // TYP_SIMD8 is passed in an integer register. We need the putArg node to be of the int type.
763 if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum))
767 #endif // FEATURE_SIMD
769 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
772 // The following code makes sure a register passed struct arg is moved to
773 // the register before the call is made.
774 // There are two cases (comments added in the code below.)
775 // 1. The struct is of size one eightbyte:
776 // In this case a new tree is created that is GT_PUTARG_REG
777 // with a op1 the original argument.
778 // 2. The struct is contained in 2 eightbytes:
779 // in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
780 // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST
781 // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
783 assert(info->structDesc.eightByteCount != 0);
785 if (info->structDesc.eightByteCount == 1)
788 // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
790 // Here the IR for this operation:
792 // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
793 // N003(6, 5)[000052] * --XG------ - / --* indir int
794 // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
795 // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
796 // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use)
797 // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
800 // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
803 // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use)
804 // new node is : (3, 4)[000071] ------------ * putarg_reg int RV
807 // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
808 // N003(6, 5)[000052] * --XG------ - / --* indir int
809 // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
810 // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
811 // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use)
812 // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV
813 // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
817 putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
819 else if (info->structDesc.eightByteCount == 2)
822 // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
825 // N001(3, 2) [000025] ------ - N----Source / --* &lclVar byref V01 loc1
826 // N003(3, 2) [000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
827 // N006(1, 1) [000058] ------------ + --* const int 16
828 // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
829 // N009(3, 4) [000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0]
830 // N010(3, 4) [000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use)
831 // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
834 // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void
837 // lowering arg : N012(11, 13)[000065] ------------ * <list> struct
840 // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
841 // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
842 // N006(1, 1)[000058] ------------ + --* const int 16
843 // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
844 // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0]
845 // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long
846 // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use)
847 // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long
848 // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
852 assert(arg->OperGet() == GT_LIST);
854 GenTreeArgList* argListPtr = arg->AsArgList();
855 assert(argListPtr->IsAggregate());
857 for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
859 // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
860 GenTreePtr newOper = comp->gtNewOperNode(
862 comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
863 info->structDesc.eightByteSizes[ctr]),
864 argListPtr->gtOp.gtOp1);
866 // Splice in the new GT_PUTARG_REG node in the GT_LIST
867 ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
870 // Just return arg. The GT_LIST is not replaced.
871 // Nothing more to do.
877 "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR.
881 #else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
882 #if FEATURE_MULTIREG_ARGS
883 if ((info->numRegs > 1) && (arg->OperGet() == GT_LIST))
885 assert(arg->OperGet() == GT_LIST);
887 GenTreeArgList* argListPtr = arg->AsArgList();
888 assert(argListPtr->IsAggregate());
890 for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
892 GenTreePtr curOp = argListPtr->gtOp.gtOp1;
893 var_types curTyp = curOp->TypeGet();
895 // Create a new GT_PUTARG_REG node with op1
896 GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
898 // Splice in the new GT_PUTARG_REG node in the GT_LIST
899 ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
902 // Just return arg. The GT_LIST is not replaced.
903 // Nothing more to do.
907 #endif // FEATURE_MULTIREG_ARGS
908 #endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
910 putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
915 // Mark this one as tail call arg if it is a fast tail call.
916 // This provides the info to put this argument in in-coming arg area slot
917 // instead of in out-going arg area slot.
919 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is
922 #if FEATURE_FASTTAILCALL
923 putArg = new (comp, GT_PUTARG_STK)
924 GenTreePutArgStk(GT_PUTARG_STK, type, arg,
925 info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
926 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct),
927 call->IsFastTailCall() DEBUGARG(call));
929 putArg = new (comp, GT_PUTARG_STK)
930 GenTreePutArgStk(GT_PUTARG_STK, type, arg,
931 info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
932 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct) DEBUGARG(call));
935 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
936 // If the ArgTabEntry indicates that this arg is a struct
937 // get and store the number of slots that are references.
938 // This is later used in the codegen for PUT_ARG_STK implementation
939 // for struct to decide whether and how many single eight-byte copies
940 // to be done (only for reference slots), so gcinfo is emitted.
941 // For non-reference slots faster/smaller size instructions are used -
942 // pair copying using XMM registers or rep mov instructions.
945 unsigned numRefs = 0;
946 BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
947 // We use GT_OBJ for non-SIMD struct arguments. However, for
948 // SIMD arguments the GT_OBJ has already been transformed.
949 if (arg->gtOper != GT_OBJ)
951 assert(varTypeIsSIMD(arg));
955 assert(!varTypeIsSIMD(arg));
956 numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
959 putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
961 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
968 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
969 else if (info->isStruct)
971 if (info->structDesc.passedInRegisters)
978 JITDUMP("new node is : ");
982 if (arg->gtFlags & GTF_LATE_ARG)
984 putArg->gtFlags |= GTF_LATE_ARG;
986 else if (updateArgTable)
993 //------------------------------------------------------------------------
994 // LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between
995 // the argument evaluation and the call. This is the point at which the source is
996 // consumed and the value transitions from control of the register allocator to the calling
1000 // call - The call node
1001 // ppArg - Pointer to the call argument pointer. We might replace the call argument by
1007 void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
1009 GenTreePtr arg = *ppArg;
1011 JITDUMP("lowering arg : ");
1014 // No assignments should remain by Lowering.
1015 assert(!arg->OperIsAssignment());
1016 assert(!arg->OperIsPutArgStk());
1018 // Assignments/stores at this level are not really placing an argument.
1019 // They are setting up temporary locals that will later be placed into
1020 // outgoing regs or stack.
1021 if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
1026 fgArgTabEntryPtr info = comp->gtArgEntryByNode(call, arg);
1027 assert(info->node == arg);
1028 bool isReg = (info->regNum != REG_STK);
1029 var_types type = arg->TypeGet();
1031 if (varTypeIsSmall(type))
1033 // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
1039 // If we hit this we are probably double-lowering.
1040 assert(!arg->OperIsPutArg());
1042 #if !defined(_TARGET_64BIT_)
1043 if (varTypeIsLong(type))
1047 NYI("Lowering of long register argument");
1050 // For longs, we will create two PUTARG_STKs below the GT_LONG. The hi argument needs to
1051 // be pushed first, so the hi PUTARG_STK will precede the lo PUTARG_STK in execution order.
1052 noway_assert(arg->OperGet() == GT_LONG);
1053 GenTreePtr argLo = arg->gtGetOp1();
1054 GenTreePtr argHi = arg->gtGetOp2();
1056 GenTreePtr putArgLo = NewPutArg(call, argLo, info, type);
1057 GenTreePtr putArgHi = NewPutArg(call, argHi, info, type);
1059 arg->gtOp.gtOp1 = putArgLo;
1060 arg->gtOp.gtOp2 = putArgHi;
1062 BlockRange().InsertBefore(arg, putArgHi, putArgLo);
1064 // The execution order now looks like this:
1065 // argLoPrev <-> argLoFirst ... argLo <-> argHiFirst ... argHi <-> putArgHi <-> putArgLo <-> arg(GT_LONG)
1067 assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
1068 arg->gtFlags |= GTF_REVERSE_OPS; // We consume the high arg (op2) first.
1071 #endif // !defined(_TARGET_64BIT_)
1074 #ifdef _TARGET_ARM64_
1075 // For vararg call, reg args should be all integer.
1076 // Insert a copy to move float value to integer register.
1077 if (call->IsVarargs() && varTypeIsFloating(type))
1079 var_types intType = (type == TYP_DOUBLE) ? TYP_LONG : TYP_INT;
1080 GenTreePtr intArg = comp->gtNewOperNode(GT_COPY, intType, arg);
1082 info->node = intArg;
1083 ReplaceArgWithPutArgOrCopy(ppArg, intArg);
1085 // Update arg/type with new ones.
1091 putArg = NewPutArg(call, arg, info, type);
1093 // In the case of register passable struct (in one or two registers)
1094 // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.)
1095 // If an extra node is returned, splice it in the right place in the tree.
1098 ReplaceArgWithPutArgOrCopy(ppArg, putArg);
1103 // do lowering steps for each arg of a call
1104 void Lowering::LowerArgsForCall(GenTreeCall* call)
1106 JITDUMP("objp:\n======\n");
1107 if (call->gtCallObjp)
1109 LowerArg(call, &call->gtCallObjp);
1112 GenTreeArgList* args = call->gtCallArgs;
1114 JITDUMP("\nargs:\n======\n");
1115 for (; args; args = args->Rest())
1117 LowerArg(call, &args->Current());
1120 JITDUMP("\nlate:\n======\n");
1121 for (args = call->gtCallLateArgs; args; args = args->Rest())
1123 LowerArg(call, &args->Current());
1127 // helper that create a node representing a relocatable physical address computation
1128 // (optionally specifying the register to place it in)
1129 GenTree* Lowering::AddrGen(ssize_t addr, regNumber reg)
1131 // this should end up in codegen as : instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, addr)
1132 GenTree* result = comp->gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
1134 result->gtRegNum = reg;
1139 // variant that takes a void*
1140 GenTree* Lowering::AddrGen(void* addr, regNumber reg)
1142 return AddrGen((ssize_t)addr, reg);
1145 // do lowering steps for a call
1147 // - adding the placement nodes (either stack or register variety) for arguments
1148 // - lowering the expression that calculates the target address
1149 // - adding nodes for other operations that occur after the call sequence starts and before
1150 // control transfer occurs (profiling and tail call helpers, pinvoke incantations)
1152 void Lowering::LowerCall(GenTree* node)
1154 GenTreeCall* call = node->AsCall();
1156 JITDUMP("lowering call (before):\n");
1157 DISPTREERANGE(BlockRange(), call);
1160 LowerArgsForCall(call);
1162 // RyuJIT arm is not set up for lowered call control
1163 #ifndef _TARGET_ARM_
1165 // note that everything generated from this point on runs AFTER the outgoing args are placed
1166 GenTree* result = nullptr;
1168 // for x86, this is where we record ESP for checking later to make sure stack is balanced
1170 // Check for Delegate.Invoke(). If so, we inline it. We get the
1171 // target-object and target-function from the delegate-object, and do
1172 // an indirect call.
1173 if (call->IsDelegateInvoke())
1175 result = LowerDelegateInvoke(call);
1179 // Virtual and interface calls
1180 switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
1182 case GTF_CALL_VIRT_STUB:
1183 result = LowerVirtualStubCall(call);
1186 case GTF_CALL_VIRT_VTABLE:
1187 // stub dispatching is off or this is not a virtual call (could be a tailcall)
1188 result = LowerVirtualVtableCall(call);
1191 case GTF_CALL_NONVIRT:
1192 if (call->IsUnmanaged())
1194 result = LowerNonvirtPinvokeCall(call);
1196 else if (call->gtCallType == CT_INDIRECT)
1198 result = LowerIndirectNonvirtCall(call);
1202 result = LowerDirectCall(call);
1207 noway_assert(!"strange call type");
1212 if (call->IsTailCallViaHelper())
1214 // Either controlExpr or gtCallAddr must contain real call target.
1215 if (result == nullptr)
1217 assert(call->gtCallType == CT_INDIRECT);
1218 assert(call->gtCallAddr != nullptr);
1219 result = call->gtCallAddr;
1222 result = LowerTailCallViaHelper(call, result);
1224 else if (call->IsFastTailCall())
1226 LowerFastTailCall(call);
1229 if (result != nullptr)
1231 LIR::Range resultRange = LIR::SeqTree(comp, result);
1233 JITDUMP("results of lowering call:\n");
1234 DISPRANGE(resultRange);
1236 GenTree* insertionPoint = call;
1237 if (!call->IsTailCallViaHelper())
1239 // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist
1241 // TODO-LIR: find out what's really required here, as this is currently a tree order
1243 if (call->gtCallType == CT_INDIRECT)
1245 bool isClosed = false;
1246 if (call->gtCallCookie != nullptr)
1249 GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
1251 assert(call->gtCallCookie->Precedes(firstCallAddrNode));
1254 insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode();
1257 else if (call->gtCallAddr != nullptr)
1259 insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
1265 BlockRange().InsertBefore(insertionPoint, std::move(resultRange));
1267 call->gtControlExpr = result;
1269 #endif //!_TARGET_ARM_
1271 if (comp->opts.IsJit64Compat())
1273 CheckVSQuirkStackPaddingNeeded(call);
1276 JITDUMP("lowering call (after):\n");
1277 DISPTREERANGE(BlockRange(), call);
1281 // Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14),
1282 // we still need this quirk for desktop so that older version of VS (e.g. VS2010/2012)
1283 // continues to work.
1284 // This quirk is excluded from other targets that have no back compat burden.
1286 // Quirk for VS debug-launch scenario to work:
1287 // See if this is a PInvoke call with exactly one param that is the address of a struct local.
1288 // In such a case indicate to frame-layout logic to add 16-bytes of padding
1289 // between save-reg area and locals. This is to protect against the buffer
1290 // overrun bug in microsoft.intellitrace.11.0.0.dll!ProfilerInterop.InitInterop().
1292 // A work-around to this bug is to disable IntelliTrace debugging
1293 // (VS->Tools->Options->IntelliTrace->Enable IntelliTrace - uncheck this option).
1294 // The reason why this works on Jit64 is that at the point of AV the call stack is
1296 // GetSystemInfo() Native call
1297 // IL_Stub generated for PInvoke declaration.
1298 // ProfilerInterface::InitInterop()
1299 // ProfilerInterface.Cctor()
1302 // The cctor body has just the call to InitInterop(). VM asm worker is holding
1303 // something in rbx that is used immediately after the Cctor call. Jit64 generated
1304 // InitInterop() method is pushing the registers in the following order
1314 // Due to buffer overrun, rbx doesn't get impacted. Whereas RyuJIT jitted code of
1315 // the same method is pushing regs in the following order
1323 // Therefore as a fix, we add padding between save-reg area and locals to
1324 // make this scenario work against JB.
1326 // Note: If this quirk gets broken due to other JIT optimizations, we should consider
1327 // more tolerant fix. One such fix is to padd the struct.
1328 void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
1330 assert(comp->opts.IsJit64Compat());
1332 #ifdef _TARGET_AMD64_
1333 // Confine this to IL stub calls which aren't marked as unmanaged.
1334 if (call->IsPInvoke() && !call->IsUnmanaged())
1336 bool paddingNeeded = false;
1337 GenTreePtr firstPutArgReg = nullptr;
1338 for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
1340 GenTreePtr tmp = args->Current();
1341 if (tmp->OperGet() == GT_PUTARG_REG)
1343 if (firstPutArgReg == nullptr)
1345 firstPutArgReg = tmp;
1346 GenTreePtr op1 = firstPutArgReg->gtOp.gtOp1;
1348 if (op1->OperGet() == GT_LCL_VAR_ADDR)
1350 unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
1351 // TODO-1stClassStructs: This is here to duplicate previous behavior,
1352 // but is not needed because the scenario being quirked did not involve
1353 // a SIMD or enregisterable struct.
1354 // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT)
1355 if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet()))
1357 // First arg is addr of a struct local.
1358 paddingNeeded = true;
1362 // Not a struct local.
1363 assert(paddingNeeded == false);
1369 // First arg is not a local var addr.
1370 assert(paddingNeeded == false);
1376 // Has more than one arg.
1377 paddingNeeded = false;
1385 comp->compVSQuirkStackPaddingNeeded = VSQUIRK_STACK_PAD;
1388 #endif // _TARGET_AMD64_
1391 // Inserts profiler hook, GT_PROF_HOOK for a tail call node.
1393 // We need to insert this after all nested calls, but before all the arguments to this call have been set up.
1394 // To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before
1395 // that. If there are no args, then it should be inserted before the call node.
1398 // * stmtExpr void (top level) (IL 0x000...0x010)
1399 // arg0 SETUP | /--* argPlace ref REG NA $c5
1400 // this in rcx | | /--* argPlace ref REG NA $c1
1401 // | | | /--* call ref System.Globalization.CultureInfo.get_InvariantCulture $c2
1402 // arg1 SETUP | | +--* st.lclVar ref V02 tmp1 REG NA $c2
1403 // | | | /--* lclVar ref V02 tmp1 u : 2 (last use) REG NA $c2
1404 // arg1 in rdx | | +--* putarg_reg ref REG NA
1405 // | | | /--* lclVar ref V00 arg0 u : 2 (last use) REG NA $80
1406 // this in rcx | | +--* putarg_reg ref REG NA
1407 // | | /--* call nullcheck ref System.String.ToLower $c5
1408 // | | { * stmtExpr void (embedded)(IL 0x000... ? ? ? )
1409 // | | { \--* prof_hook void REG NA
1410 // arg0 in rcx | +--* putarg_reg ref REG NA
1411 // control expr | +--* const(h) long 0x7ffe8e910e98 ftn REG NA
1412 // \--* call void System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void
1414 // In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call
1415 // (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
1418 // callNode - tail call node
1419 // insertionPoint - if caller has an insertion point; If null
1420 // profiler hook is inserted before args are setup
1421 // but after all arg side effects are computed.
1422 void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint)
1424 assert(call->IsTailCall());
1425 assert(comp->compIsProfilerHookNeeded());
1427 if (insertionPoint == nullptr)
1429 GenTreePtr tmp = nullptr;
1430 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
1432 tmp = args->Current();
1433 assert(tmp->OperGet() != GT_PUTARG_REG); // We don't expect to see these in gtCallArgs
1434 if (tmp->OperGet() == GT_PUTARG_STK)
1437 insertionPoint = tmp;
1442 if (insertionPoint == nullptr)
1444 for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
1446 tmp = args->Current();
1447 if ((tmp->OperGet() == GT_PUTARG_REG) || (tmp->OperGet() == GT_PUTARG_STK))
1450 insertionPoint = tmp;
1455 // If there are no args, insert before the call node
1456 if (insertionPoint == nullptr)
1458 insertionPoint = call;
1463 assert(insertionPoint != nullptr);
1464 GenTreePtr profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID);
1465 BlockRange().InsertBefore(insertionPoint, profHookNode);
1468 // Lower fast tail call implemented as epilog+jmp.
1469 // Also inserts PInvoke method epilog if required.
1470 void Lowering::LowerFastTailCall(GenTreeCall* call)
1472 #if FEATURE_FASTTAILCALL
1473 // Tail call restrictions i.e. conditions under which tail prefix is ignored.
1474 // Most of these checks are already done by importer or fgMorphTailCall().
1475 // This serves as a double sanity check.
1476 assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
1477 assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
1478 assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
1479 assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
1480 assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
1482 // We expect to see a call that meets the following conditions
1483 assert(call->IsFastTailCall());
1485 // VM cannot use return address hijacking when A() and B() tail call each
1486 // other in mutual recursion. Therefore, this block is reachable through
1487 // a GC-safe point or the whole method is marked as fully interruptible.
1490 // optReachWithoutCall() depends on the fact that loop headers blocks
1491 // will have a block number > fgLastBB. These loop headers gets added
1492 // after dominator computation and get skipped by OptReachWithoutCall().
1493 // The below condition cannot be asserted in lower because fgSimpleLowering()
1494 // can add a new basic block for range check failure which becomes
1495 // fgLastBB with block number > loop header block number.
1496 // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
1497 // !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
1499 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
1500 // a method returns. This is a case of caller method has both PInvokes and tail calls.
1501 if (comp->info.compCallUnmanaged)
1503 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
1506 // Args for tail call are setup in incoming arg area. The gc-ness of args of
1507 // caller and callee (which being tail called) may not match. Therefore, everything
1508 // from arg setup until the epilog need to be non-interuptible by GC. This is
1509 // achieved by inserting GT_START_NONGC before the very first GT_PUTARG_STK node
1510 // of call is setup. Note that once a stack arg is setup, it cannot have nested
1511 // calls subsequently in execution order to setup other args, because the nested
1512 // call could over-write the stack arg that is setup earlier.
1513 GenTreePtr firstPutArgStk = nullptr;
1514 GenTreeArgList* args;
1515 ArrayStack<GenTree*> putargs(comp);
1517 for (args = call->gtCallArgs; args; args = args->Rest())
1519 GenTreePtr tmp = args->Current();
1520 if (tmp->OperGet() == GT_PUTARG_STK)
1526 for (args = call->gtCallLateArgs; args; args = args->Rest())
1528 GenTreePtr tmp = args->Current();
1529 if (tmp->OperGet() == GT_PUTARG_STK)
1535 if (putargs.Height() > 0)
1537 firstPutArgStk = putargs.Bottom();
1540 // If we have a putarg_stk node, also count the number of non-standard args the
1541 // call node has. Note that while determining whether a tail call can be fast
1542 // tail called, we don't count non-standard args (passed in R10 or R11) since they
1543 // don't contribute to outgoing arg space. These non-standard args are not
1544 // accounted in caller's arg count but accounted in callee's arg count after
1545 // fgMorphArgs(). Therefore, exclude callee's non-standard args while mapping
1546 // callee's stack arg num to corresponding caller's stack arg num.
1547 unsigned calleeNonStandardArgCount = call->GetNonStandardAddedArgCount(comp);
1549 // Say Caller(a, b, c, d, e) fast tail calls Callee(e, d, c, b, a)
1550 // i.e. passes its arguments in reverse to Callee. During call site
1551 // setup, after computing argument side effects, stack args are setup
1552 // first and reg args next. In the above example, both Callers and
1553 // Callee stack args (e and a respectively) share the same stack slot
1554 // and are alive at the same time. The act of setting up Callee's
1555 // stack arg will over-write the stack arg of Caller and if there are
1556 // further uses of Caller stack arg we have to make sure that we move
1557 // it to a temp before over-writing its slot and use temp in place of
1558 // the corresponding Caller stack arg.
1560 // For the above example, conceptually this is what is done
1562 // Stack slot of e = a
1563 // R9 = b, R8 = c, RDx = d
1566 // The below logic is meant to detect cases like this and introduce
1567 // temps to set up args correctly for Callee.
1569 for (int i = 0; i < putargs.Height(); i++)
1571 GenTreePtr putArgStkNode = putargs.Bottom(i);
1573 assert(putArgStkNode->OperGet() == GT_PUTARG_STK);
1575 // Get the caller arg num corresponding to this callee arg.
1576 // Note that these two args share the same stack slot. Therefore,
1577 // if there are further uses of corresponding caller arg, we need
1578 // to move it to a temp and use the temp in this call tree.
1580 // Note that Caller is guaranteed to have a param corresponding to
1581 // this Callee's arg since fast tail call mechanism counts the
1582 // stack slots required for both Caller and Callee for passing params
1583 // and allow fast tail call only if stack slots required by Caller >=
1585 fgArgTabEntryPtr argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode);
1586 assert(argTabEntry);
1587 unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount;
1588 noway_assert(callerArgNum < comp->info.compArgsCount);
1590 unsigned callerArgLclNum = callerArgNum;
1591 LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum;
1592 if (callerArgDsc->lvPromoted)
1595 callerArgDsc->lvFieldLclStart; // update the callerArgNum to the promoted struct field's lclNum
1596 callerArgDsc = comp->lvaTable + callerArgLclNum;
1598 noway_assert(callerArgDsc->lvIsParam);
1600 // Start searching in execution order list till we encounter call node
1601 unsigned tmpLclNum = BAD_VAR_NUM;
1602 var_types tmpType = TYP_UNDEF;
1603 for (GenTreePtr treeNode = putArgStkNode->gtNext; treeNode != call; treeNode = treeNode->gtNext)
1605 if (treeNode->OperIsLocal() || treeNode->OperIsLocalAddr())
1607 // This should neither be a GT_REG_VAR nor GT_PHI_ARG.
1608 assert((treeNode->OperGet() != GT_REG_VAR) && (treeNode->OperGet() != GT_PHI_ARG));
1610 GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
1611 LclVarDsc* lclVar = &comp->lvaTable[lcl->gtLclNum];
1613 // Fast tail calling criteria permits passing of structs of size 1, 2, 4 and 8 as args.
1614 // It is possible that the callerArgLclNum corresponds to such a struct whose stack slot
1615 // is getting over-written by setting up of a stack arg and there are further uses of
1616 // any of its fields if such a struct is type-dependently promoted. In this case too
1617 // we need to introduce a temp.
1618 if ((lcl->gtLclNum == callerArgNum) || (lcl->gtLclNum == callerArgLclNum))
1620 // Create tmp and use it in place of callerArgDsc
1621 if (tmpLclNum == BAD_VAR_NUM)
1623 tmpLclNum = comp->lvaGrabTemp(
1624 true DEBUGARG("Fast tail call lowering is creating a new local variable"));
1625 comp->lvaSortAgain = true;
1626 tmpType = genActualType(callerArgDsc->lvaArgType());
1627 comp->lvaTable[tmpLclNum].lvType = tmpType;
1628 comp->lvaTable[tmpLclNum].lvRefCnt = 1;
1631 lcl->SetLclNum(tmpLclNum);
1632 lcl->SetOper(GT_LCL_VAR);
1637 // If we have created a temp, insert an embedded assignment stmnt before
1638 // the first putargStkNode i.e.
1639 // tmpLcl = CallerArg
1640 if (tmpLclNum != BAD_VAR_NUM)
1642 assert(tmpType != TYP_UNDEF);
1643 GenTreeLclVar* local =
1644 new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET);
1645 GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local);
1646 BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr));
1650 // Insert GT_START_NONGC node before the first GT_PUTARG_STK node.
1651 // Note that if there are no args to be setup on stack, no need to
1652 // insert GT_START_NONGC node.
1653 GenTreePtr startNonGCNode = nullptr;
1654 if (firstPutArgStk != nullptr)
1656 startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
1657 BlockRange().InsertBefore(firstPutArgStk, startNonGCNode);
1659 // Gc-interruptability in the following case:
1660 // foo(a, b, c, d, e) { bar(a, b, c, d, e); }
1661 // bar(a, b, c, d, e) { foo(a, b, d, d, e); }
1663 // Since the instruction group starting from the instruction that sets up first
1664 // stack arg to the end of the tail call is marked as non-gc interruptible,
1665 // this will form a non-interruptible tight loop causing gc-starvation. To fix
1666 // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
1667 // has a single basic block and is not a GC-safe point. The presence of a single
1668 // nop outside non-gc interruptible region will prevent gc starvation.
1669 if ((comp->fgBBcount == 1) && !(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT))
1671 assert(comp->fgFirstBB == comp->compCurBB);
1672 GenTreePtr noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
1673 BlockRange().InsertBefore(startNonGCNode, noOp);
1677 // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be
1678 // inserted before the args are setup but after the side effects of args are
1679 // computed. That is, GT_PROF_HOOK node needs to be inserted before GT_START_NONGC
1680 // node if one exists.
1681 if (comp->compIsProfilerHookNeeded())
1683 InsertProfTailCallHook(call, startNonGCNode);
1686 #else // !FEATURE_FASTTAILCALL
1688 // Platform choose not to implement fast tail call mechanism.
1689 // In such a case we should never be reaching this method as
1690 // the expectation is that IsTailCallViaHelper() will always
1691 // be true on such a platform.
1696 //------------------------------------------------------------------------
1697 // LowerTailCallViaHelper: lower a call via the tailcall helper. Morph
1698 // has already inserted tailcall helper special arguments. This function
1699 // inserts actual data for some placeholders.
1702 // tail.call(void* copyRoutine, void* dummyArg, ...)
1704 // Jit_TailCall(void* copyRoutine, void* callTarget, ...)
1707 // tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg)
1709 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
1711 // Note that the special arguments are on the stack, whereas the function arguments follow the normal convention.
1713 // Also inserts PInvoke method epilog if required.
1716 // call - The call node
1717 // callTarget - The real call target. This is used to replace the dummyArg during lowering.
1720 // Returns control expression tree for making a call to helper Jit_TailCall.
1722 GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget)
1724 // Tail call restrictions i.e. conditions under which tail prefix is ignored.
1725 // Most of these checks are already done by importer or fgMorphTailCall().
1726 // This serves as a double sanity check.
1727 assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
1728 assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
1729 assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
1730 assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
1732 #ifdef _TARGET_AMD64_
1733 assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
1734 #endif // _TARGET_AMD64_
1736 // We expect to see a call that meets the following conditions
1737 assert(call->IsTailCallViaHelper());
1738 assert(callTarget != nullptr);
1740 // The TailCall helper call never returns to the caller and is not GC interruptible.
1741 // Therefore the block containing the tail call should be a GC safe point to avoid
1743 assert(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
1745 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
1746 // a method returns. This is a case of caller method has both PInvokes and tail calls.
1747 if (comp->info.compCallUnmanaged)
1749 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
1752 // Remove gtCallAddr from execution order if present.
1753 if (call->gtCallType == CT_INDIRECT)
1755 assert(call->gtCallAddr != nullptr);
1758 LIR::ReadOnlyRange callAddrRange = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed);
1761 BlockRange().Remove(std::move(callAddrRange));
1764 // The callTarget tree needs to be sequenced.
1765 LIR::Range callTargetRange = LIR::SeqTree(comp, callTarget);
1767 fgArgTabEntry* argEntry;
1769 #if defined(_TARGET_AMD64_)
1771 // For AMD64, first argument is CopyRoutine and second argument is a place holder node.
1774 argEntry = comp->gtArgEntryByArgNum(call, 0);
1775 assert(argEntry != nullptr);
1776 assert(argEntry->node->gtOper == GT_PUTARG_REG);
1777 GenTree* firstArg = argEntry->node->gtOp.gtOp1;
1778 assert(firstArg->gtOper == GT_CNS_INT);
1781 // Replace second arg by callTarget.
1782 argEntry = comp->gtArgEntryByArgNum(call, 1);
1783 assert(argEntry != nullptr);
1784 assert(argEntry->node->gtOper == GT_PUTARG_REG);
1785 GenTree* secondArg = argEntry->node->gtOp.gtOp1;
1787 BlockRange().InsertAfter(secondArg, std::move(callTargetRange));
1790 LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(secondArg, &isClosed);
1793 BlockRange().Remove(std::move(secondArgRange));
1795 argEntry->node->gtOp.gtOp1 = callTarget;
1797 #elif defined(_TARGET_X86_)
1799 // Verify the special args are what we expect, and replace the dummy args with real values.
1800 // We need to figure out the size of the outgoing stack arguments, not including the special args.
1801 // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes.
1802 // This number is exactly the next slot number in the call's argument info struct.
1803 unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum();
1804 assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args.
1805 nNewStkArgsWords -= 4;
1807 unsigned numArgs = call->fgArgInfo->ArgCount();
1809 // arg 0 == callTarget.
1810 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1);
1811 assert(argEntry != nullptr);
1812 assert(argEntry->node->gtOper == GT_PUTARG_STK);
1813 GenTree* arg0 = argEntry->node->gtOp.gtOp1;
1815 BlockRange().InsertAfter(arg0, std::move(callTargetRange));
1818 LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
1821 argEntry->node->gtOp.gtOp1 = callTarget;
1824 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2);
1825 assert(argEntry != nullptr);
1826 assert(argEntry->node->gtOper == GT_PUTARG_STK);
1827 GenTree* arg1 = argEntry->node->gtOp.gtOp1;
1828 assert(arg1->gtOper == GT_CNS_INT);
1830 ssize_t tailCallHelperFlags = 1 | // always restore EDI,ESI,EBX
1831 (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag
1832 arg1->gtIntCon.gtIconVal = tailCallHelperFlags;
1834 // arg 2 == numberOfNewStackArgsWords
1835 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3);
1836 assert(argEntry != nullptr);
1837 assert(argEntry->node->gtOper == GT_PUTARG_STK);
1838 GenTree* arg2 = argEntry->node->gtOp.gtOp1;
1839 assert(arg2->gtOper == GT_CNS_INT);
1841 arg2->gtIntCon.gtIconVal = nNewStkArgsWords;
1844 // arg 3 == numberOfOldStackArgsWords
1845 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4);
1846 assert(argEntry != nullptr);
1847 assert(argEntry->node->gtOper == GT_PUTARG_STK);
1848 GenTree* arg3 = argEntry->node->gtOp.gtOp1;
1849 assert(arg3->gtOper == GT_CNS_INT);
1853 NYI("LowerTailCallViaHelper");
1856 // Transform this call node into a call to Jit tail call helper.
1857 call->gtCallType = CT_HELPER;
1858 call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL);
1859 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
1861 // Lower this as if it were a pure helper call.
1862 call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER);
1863 GenTree* result = LowerDirectCall(call);
1865 // Now add back tail call flags for identifying this node as tail call dispatched via helper.
1866 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
1868 // Insert profiler tail call hook if needed.
1869 // Since we don't know the insertion point, pass null for second param.
1870 if (comp->compIsProfilerHookNeeded())
1872 InsertProfTailCallHook(call, nullptr);
1878 //------------------------------------------------------------------------
1879 // Lowering::LowerCompare: lowers a compare node.
1881 // For 64-bit targets, this doesn't do much of anything: all comparisons
1882 // that we support can be handled in code generation on such targets.
1884 // For 32-bit targets, however, any comparison that feeds a `GT_JTRUE`
1885 // node must be lowered such that the liveness of the operands to the
1886 // comparison is properly visible to the rest of the backend. As such,
1887 // a 64-bit comparison is lowered from something like this:
1889 // ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
1890 // N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
1893 // N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0
1895 // N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101
1899 // N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149
1902 // N005 ( 11, 19) [000011] ------------ * jmpTrue void
1904 // To something like this:
1906 // ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
1907 // [000099] ------------ t99 = const int 0
1909 // [000101] ------------ t101 = const int 0
1913 // N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149
1916 // N005 ( 11, 19) [000011] ------------ * jmpTrue void
1919 // ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
1920 // [000105] -------N-U-- jcc void cond=<
1923 // ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
1924 // N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
1926 // N003 ( 3, 10) [000009] ------------ t9 = const int 3
1930 // [000106] N------N-U-- t106 = * < int
1933 // [000107] ------------ * jmpTrue void
1935 // Which will eventually generate code similar to the following:
1937 // 33DB xor ebx, ebx
1938 // 85DB test ebx, ebx
1939 // 7707 ja SHORT G_M50523_IG04
1940 // 72E7 jb SHORT G_M50523_IG03
1941 // 83F803 cmp eax, 3
1942 // 72E2 jb SHORT G_M50523_IG03
1944 void Lowering::LowerCompare(GenTree* cmp)
1946 #ifndef _TARGET_64BIT_
1947 if (cmp->gtGetOp1()->TypeGet() != TYP_LONG)
1954 if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE)
1959 GenTree* src1 = cmp->gtGetOp1();
1960 GenTree* src2 = cmp->gtGetOp2();
1961 unsigned weight = m_block->getBBWeight(comp);
1963 LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
1964 LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
1966 if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR)
1968 loSrc1.ReplaceWithLclVar(comp, weight);
1971 if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR)
1973 loSrc2.ReplaceWithLclVar(comp, weight);
1976 BasicBlock* jumpDest = m_block->bbJumpDest;
1977 BasicBlock* nextDest = m_block->bbNext;
1978 BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
1980 cmp->gtType = TYP_INT;
1981 cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
1982 cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
1984 if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE)
1986 // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
1987 // bits and one for the lower 32 bits. As such, we update the flow graph like so:
2008 BlockRange().Remove(loSrc1.Def());
2009 BlockRange().Remove(loSrc2.Def());
2010 GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
2011 loCmp->gtFlags = cmp->gtFlags;
2012 GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
2013 LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
2015 m_block->bbJumpKind = BBJ_COND;
2017 if (cmp->OperGet() == GT_EQ)
2019 cmp->gtOper = GT_NE;
2020 m_block->bbJumpDest = nextDest;
2021 nextDest->bbFlags |= BBF_JMP_TARGET;
2022 comp->fgAddRefPred(nextDest, m_block);
2026 m_block->bbJumpDest = jumpDest;
2027 comp->fgAddRefPred(jumpDest, m_block);
2030 assert(newBlock->bbJumpKind == BBJ_COND);
2031 assert(newBlock->bbJumpDest == jumpDest);
2035 // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and one
2036 // comparison for the lower 32 bits. We update the flowgraph as such:
2046 // BB0: (!cond(hi) && !eq(hi))
2050 // | BB3: (cond(hi) && !eq(hi))
2054 // | BB4: cond(lo) |
2061 // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
2062 // differ based on the original condition, and all consist of a single node. The switch statement below
2063 // performs the necessary mapping.
2066 genTreeOps hiCmpOper;
2067 genTreeOps loCmpOper;
2069 switch (cmp->OperGet())
2072 cmp->gtOper = GT_GT;
2077 cmp->gtOper = GT_GT;
2082 cmp->gtOper = GT_LT;
2087 cmp->gtOper = GT_LT;
2095 BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
2097 GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
2098 hiJcc->gtFlags = cmp->gtFlags;
2099 LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
2101 BlockRange().Remove(loSrc1.Def());
2102 BlockRange().Remove(loSrc2.Def());
2103 GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
2104 loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
2105 GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
2106 LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
2108 m_block->bbJumpKind = BBJ_COND;
2109 m_block->bbJumpDest = nextDest;
2110 nextDest->bbFlags |= BBF_JMP_TARGET;
2111 comp->fgAddRefPred(nextDest, m_block);
2113 newBlock->bbJumpKind = BBJ_COND;
2114 newBlock->bbJumpDest = jumpDest;
2115 comp->fgAddRefPred(jumpDest, newBlock);
2117 assert(newBlock2->bbJumpKind == BBJ_COND);
2118 assert(newBlock2->bbJumpDest == jumpDest);
2121 BlockRange().Remove(src1);
2122 BlockRange().Remove(src2);
2126 // Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
2127 void Lowering::LowerJmpMethod(GenTree* jmp)
2129 assert(jmp->OperGet() == GT_JMP);
2131 JITDUMP("lowering GT_JMP\n");
2133 JITDUMP("============");
2135 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
2136 // a method returns.
2137 if (comp->info.compCallUnmanaged)
2139 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp));
2143 // Lower GT_RETURN node to insert PInvoke method epilog if required.
2144 void Lowering::LowerRet(GenTree* ret)
2146 assert(ret->OperGet() == GT_RETURN);
2148 JITDUMP("lowering GT_RETURN\n");
2150 JITDUMP("============");
2152 // Method doing PInvokes has exactly one return block unless it has tail calls.
2153 if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB))
2155 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret));
2159 GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
2161 noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER);
2163 // Don't support tail calling helper methods.
2164 // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper.
2165 noway_assert(!call->IsTailCall() || call->IsTailCallViaHelper() || call->gtCallType == CT_USER_FUNC);
2167 // Non-virtual direct/indirect calls: Work out if the address of the
2168 // call is known at JIT time. If not it is either an indirect call
2169 // or the address must be accessed via an single/double indirection.
2172 InfoAccessType accessType;
2173 CorInfoHelpFunc helperNum = comp->eeGetHelperNum(call->gtCallMethHnd);
2175 #ifdef FEATURE_READYTORUN_COMPILER
2176 if (call->gtEntryPoint.addr != nullptr)
2178 accessType = call->gtEntryPoint.accessType;
2179 addr = call->gtEntryPoint.addr;
2183 if (call->gtCallType == CT_HELPER)
2185 noway_assert(helperNum != CORINFO_HELP_UNDEF);
2187 // the convention on getHelperFtn seems to be (it's not documented)
2188 // that it returns an address or if it returns null, pAddr is set to
2189 // another address, which requires an indirection
2191 addr = comp->info.compCompHnd->getHelperFtn(helperNum, (void**)&pAddr);
2193 if (addr != nullptr)
2195 accessType = IAT_VALUE;
2199 accessType = IAT_PVALUE;
2205 noway_assert(helperNum == CORINFO_HELP_UNDEF);
2207 CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
2209 if (call->IsSameThis())
2211 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
2214 if (!call->NeedsNullCheck())
2216 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
2219 CORINFO_CONST_LOOKUP addrInfo;
2220 comp->info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo, aflags);
2222 accessType = addrInfo.accessType;
2223 addr = addrInfo.addr;
2226 GenTree* result = nullptr;
2230 // Non-virtual direct call to known address
2231 if (!IsCallTargetInRange(addr) || call->IsTailCall())
2233 result = AddrGen(addr);
2237 // a direct call within range of hardware relative call instruction
2238 // stash the address for codegen
2239 call->gtDirectCallAddress = addr;
2245 // Non-virtual direct calls to addresses accessed by
2246 // a single indirection.
2247 GenTree* cellAddr = AddrGen(addr);
2248 GenTree* indir = Ind(cellAddr);
2250 #ifdef FEATURE_READYTORUN_COMPILER
2251 #ifdef _TARGET_ARM64_
2252 // For arm64, we dispatch code same as VSD using X11 for indirection cell address,
2253 // which ZapIndirectHelperThunk expects.
2254 if (call->IsR2RRelativeIndir())
2256 cellAddr->gtRegNum = REG_R2R_INDIRECT_PARAM;
2257 indir->gtRegNum = REG_JUMP_THUNK_PARAM;
2266 // Non-virtual direct calls to addresses accessed by
2267 // a double indirection.
2269 // Double-indirection. Load the address into a register
2270 // and call indirectly through the register
2271 noway_assert(helperNum == CORINFO_HELP_UNDEF);
2272 result = AddrGen(addr);
2273 result = Ind(Ind(result));
2277 noway_assert(!"Bad accessType");
2284 GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
2286 noway_assert(call->gtCallType == CT_USER_FUNC);
2288 assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
2289 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
2291 GenTree* thisArgNode;
2292 if (call->IsTailCallViaHelper())
2294 #ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
2295 const unsigned argNum = 0;
2296 #else // !_TARGET_X86_
2297 // In case of helper dispatched tail calls, "thisptr" will be the third arg.
2298 // The first two args are: real call target and addr of args copy routine.
2299 const unsigned argNum = 2;
2300 #endif // !_TARGET_X86_
2302 fgArgTabEntryPtr thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum);
2303 thisArgNode = thisArgTabEntry->node;
2307 thisArgNode = comp->gtGetThisArg(call);
2310 assert(thisArgNode->gtOper == GT_PUTARG_REG);
2311 GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1;
2313 // We're going to use the 'this' expression multiple times, so make a local to copy it.
2318 if (call->IsTailCallViaHelper() && originalThisExpr->IsLocal())
2320 // For ordering purposes for the special tailcall arguments on x86, we forced the
2321 // 'this' pointer in this case to a local in Compiler::fgMorphTailCall().
2322 // We could possibly use this case to remove copies for all architectures and non-tailcall
2323 // calls by creating a new lcl var or lcl field reference, as is done in the
2324 // LowerVirtualVtableCall() code.
2325 assert(originalThisExpr->OperGet() == GT_LCL_VAR);
2326 lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum();
2329 #endif // _TARGET_X86_
2331 unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call"));
2333 LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode);
2334 thisExprUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), delegateInvokeTmp);
2336 originalThisExpr = thisExprUse.Def(); // it's changed; reload it.
2337 lclNum = delegateInvokeTmp;
2340 // replace original expression feeding into thisPtr with
2341 // [originalThis + offsetOfDelegateInstance]
2343 GenTree* newThisAddr = new (comp, GT_LEA)
2344 GenTreeAddrMode(TYP_REF, originalThisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
2346 GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
2348 BlockRange().InsertAfter(originalThisExpr, newThisAddr, newThis);
2350 thisArgNode->gtOp.gtOp1 = newThis;
2352 // the control target is
2353 // [originalThis + firstTgtOffs]
2355 GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET);
2357 unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget;
2358 GenTree* result = new (comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs);
2359 GenTree* callTarget = Ind(result);
2361 // don't need to sequence and insert this tree, caller will do it
2366 GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
2369 if (call->gtCallCookie != nullptr)
2371 NYI_X86("Morphing indirect non-virtual call with non-standard args");
2375 // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
2376 // Hence we should never see this type of call in lower.
2378 noway_assert(call->gtCallCookie == nullptr);
2383 //------------------------------------------------------------------------
2384 // CreateReturnTrapSeq: Create a tree to perform a "return trap", used in PInvoke
2385 // epilogs to invoke a GC under a condition. The return trap checks some global
2386 // location (the runtime tells us where that is and how many indirections to make),
2387 // then, based on the result, conditionally calls a GC helper. We use a special node
2388 // for this because at this time (late in the compilation phases), introducing flow
2389 // is tedious/difficult.
2391 // This is used for PInvoke inlining.
2394 // Code tree to perform the action.
2396 GenTree* Lowering::CreateReturnTrapSeq()
2398 // The GT_RETURNTRAP node expands to this:
2399 // if (g_TrapReturningThreads)
2401 // RareDisablePreemptiveGC();
2404 // The only thing to do here is build up the expression that evaluates 'g_TrapReturningThreads'.
2406 void* pAddrOfCaptureThreadGlobal = nullptr;
2407 LONG* addrOfCaptureThreadGlobal = comp->info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
2410 if (addrOfCaptureThreadGlobal != nullptr)
2412 testTree = Ind(AddrGen(addrOfCaptureThreadGlobal));
2416 testTree = Ind(Ind(AddrGen(pAddrOfCaptureThreadGlobal)));
2418 return comp->gtNewOperNode(GT_RETURNTRAP, TYP_INT, testTree);
2421 //------------------------------------------------------------------------
2422 // SetGCState: Create a tree that stores the given constant (0 or 1) into the
2423 // thread's GC state field.
2425 // This is used for PInvoke inlining.
2428 // state - constant (0 or 1) to store into the thread's GC state field.
2431 // Code tree to perform the action.
2433 GenTree* Lowering::SetGCState(int state)
2435 // Thread.offsetOfGcState = 0/1
2437 assert(state == 0 || state == 1);
2439 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
2441 GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1);
2443 GenTree* storeGcState = new (comp, GT_STOREIND)
2444 GenTreeStoreInd(TYP_BYTE,
2445 new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState),
2446 new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state));
2448 return storeGcState;
2451 //------------------------------------------------------------------------
2452 // CreateFrameLinkUpdate: Create a tree that either links or unlinks the
2453 // locally-allocated InlinedCallFrame from the Frame list.
2455 // This is used for PInvoke inlining.
2458 // action - whether to link (push) or unlink (pop) the Frame
2461 // Code tree to perform the action.
2463 GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
2465 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
2466 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
2468 GenTree* TCB = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
2469 (IL_OFFSET)-1); // cast to resolve ambiguity.
2472 GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, TCB, nullptr, 1, pInfo->offsetOfThreadFrame);
2474 GenTree* data = nullptr;
2476 if (action == PushFrame)
2478 // Thread->m_pFrame = &inlinedCallFrame;
2479 data = new (comp, GT_LCL_FLD_ADDR)
2480 GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
2484 assert(action == PopFrame);
2485 // Thread->m_pFrame = inlinedCallFrame.m_pNext;
2487 data = new (comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar,
2488 pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
2490 GenTree* storeInd = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data);
2494 //------------------------------------------------------------------------
2495 // InsertPInvokeMethodProlog: Create the code that runs at the start of
2496 // every method that has PInvoke calls.
2498 // Initialize the TCB local and the InlinedCallFrame object. Then link ("push")
2499 // the InlinedCallFrame object on the Frame chain. The layout of InlinedCallFrame
2500 // is defined in vm/frames.h. See also vm/jitinterface.cpp for more information.
2501 // The offsets of these fields is returned by the VM in a call to ICorStaticInfo::getEEInfo().
2503 // The (current) layout is as follows:
2505 // 64-bit 32-bit CORINFO_EE_INFO
2506 // offset offset field name offset when set
2507 // -----------------------------------------------------------------------------------------
2508 // +00h +00h GS cookie offsetOfGSCookie
2509 // +08h +04h vptr for class InlinedCallFrame offsetOfFrameVptr method prolog
2510 // +10h +08h m_Next offsetOfFrameLink method prolog
2511 // +18h +0Ch m_Datum offsetOfCallTarget call site
2512 // +20h n/a m_StubSecretArg not set by JIT
2513 // +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method
2515 // non-x86: method prolog (SP remains
2516 // constant in function, after prolog: no
2517 // localloc and PInvoke in same function)
2518 // +30h +14h m_pCallerReturnAddress offsetOfReturnAddress call site
2519 // +38h +18h m_pCalleeSavedFP offsetOfCalleeSavedFP not set by JIT
2520 // +1Ch JIT retval spill area (int) before call_gc ???
2521 // +20h JIT retval spill area (long) before call_gc ???
2522 // +24h Saved value of EBP method prolog ???
2524 // Note that in the VM, InlinedCallFrame is a C++ class whose objects have a 'this' pointer that points
2525 // to the InlinedCallFrame vptr (the 2nd field listed above), and the GS cookie is stored *before*
2526 // the object. When we link the InlinedCallFrame onto the Frame chain, we must point at this location,
2527 // and not at the beginning of the InlinedCallFrame local, which is actually the GS cookie.
2532 void Lowering::InsertPInvokeMethodProlog()
2534 noway_assert(comp->info.compCallUnmanaged);
2535 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
2537 if (comp->opts.ShouldUsePInvokeHelpers())
2542 JITDUMP("======= Inserting PInvoke method prolog\n");
2544 LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB);
2546 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
2547 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
2549 // First arg: &compiler->lvaInlinedPInvokeFrameVar + callFrameInfo.offsetOfFrameVptr
2551 GenTree* frameAddr = new (comp, GT_LCL_FLD_ADDR)
2552 GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
2554 // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
2555 // TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
2556 // for x86, don't pass the secretArg.
2557 CLANG_FORMAT_COMMENT_ANCHOR;
2560 GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
2561 #else // !_TARGET_X86_
2562 GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM));
2563 #endif // !_TARGET_X86_
2565 GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, 0, argList);
2567 // some sanity checks on the frame list root vardsc
2568 LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot];
2569 noway_assert(!varDsc->lvIsParam);
2570 noway_assert(varDsc->lvType == TYP_I_IMPL);
2573 new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
2574 (IL_OFFSET)-1); // cast to resolve ambiguity.
2575 store->gtOp.gtOp1 = call;
2576 store->gtFlags |= GTF_VAR_DEF;
2578 GenTree* insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode();
2580 comp->fgMorphTree(store);
2581 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store));
2582 DISPTREERANGE(firstBlockRange, store);
2584 #ifndef _TARGET_X86_ // For x86, this step is done at the call site (due to stack pointer not being static in the
2587 // --------------------------------------------------------
2588 // InlinedCallFrame.m_pCallSiteSP = @RSP;
2590 GenTreeLclFld* storeSP = new (comp, GT_STORE_LCL_FLD)
2591 GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
2592 storeSP->gtOp1 = PhysReg(REG_SPBASE);
2594 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP));
2595 DISPTREERANGE(firstBlockRange, storeSP);
2597 #endif // !_TARGET_X86_
2599 // --------------------------------------------------------
2600 // InlinedCallFrame.m_pCalleeSavedEBP = @RBP;
2602 GenTreeLclFld* storeFP =
2603 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
2604 callFrameInfo.offsetOfCalleeSavedFP);
2605 storeFP->gtOp1 = PhysReg(REG_FPBASE);
2607 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP));
2608 DISPTREERANGE(firstBlockRange, storeFP);
2610 // --------------------------------------------------------
2612 if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
2614 // Push a frame - if we are NOT in an IL stub, this is done right before the call
2615 // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
2616 GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
2617 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
2618 DISPTREERANGE(firstBlockRange, frameUpd);
2622 //------------------------------------------------------------------------
2623 // InsertPInvokeMethodEpilog: Code that needs to be run when exiting any method
2624 // that has PInvoke inlines. This needs to be inserted any place you can exit the
2625 // function: returns, tailcalls and jmps.
2628 // returnBB - basic block from which a method can return
2629 // lastExpr - GenTree of the last top level stmnt of returnBB (debug only arg)
2632 // Code tree to perform the action.
2634 void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePtr lastExpr))
2636 assert(returnBB != nullptr);
2637 assert(comp->info.compCallUnmanaged);
2639 if (comp->opts.ShouldUsePInvokeHelpers())
2644 JITDUMP("======= Inserting PInvoke method epilog\n");
2646 // Method doing PInvoke calls has exactly one return block unless it has "jmp" or tail calls.
2647 assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) ||
2648 returnBB->endsWithTailCallOrJmp(comp));
2650 LIR::Range& returnBlockRange = LIR::AsRange(returnBB);
2652 GenTree* insertionPoint = returnBlockRange.LastNode();
2653 assert(insertionPoint == lastExpr);
2655 // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution
2656 // order so that it is guaranteed that there will be no further PInvokes after that point in the method.
2658 // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be
2659 // Op1, PME, GT_RETURN
2661 // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be
2662 // arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL
2663 // After inserting PME execution order would be:
2664 // arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL
2666 // Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP
2667 // That is after PME, args for GT_JMP call will be setup.
2669 // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a
2670 // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant,
2672 // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has
2673 // PInvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot
2674 // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to
2675 // properly extend the life of compLvFrameListRoot var.
2677 // Thread.offsetOfGcState = 0/1
2678 // That is [tcb + offsetOfGcState] = 1
2679 GenTree* storeGCState = SetGCState(1);
2680 returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState));
2682 if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
2684 // Pop the frame, in non-stubs we do this around each PInvoke call
2685 GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
2686 returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
2690 //------------------------------------------------------------------------
2691 // InsertPInvokeCallProlog: Emit the call-site prolog for direct calls to unmanaged code.
2692 // It does all the necessary call-site setup of the InlinedCallFrame.
2695 // call - the call for which we are inserting the PInvoke prolog.
2700 void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
2702 JITDUMP("======= Inserting PInvoke call prolog\n");
2704 GenTree* insertBefore = call;
2705 if (call->gtCallType == CT_INDIRECT)
2708 insertBefore = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
2712 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
2714 gtCallTypes callType = (gtCallTypes)call->gtCallType;
2716 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
2718 #if COR_JIT_EE_VERSION > 460
2719 if (comp->opts.ShouldUsePInvokeHelpers())
2721 // First argument is the address of the frame variable.
2722 GenTree* frameAddr = new (comp, GT_LCL_VAR_ADDR)
2723 GenTreeLclVar(GT_LCL_VAR_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
2725 // Insert call to CORINFO_HELP_JIT_PINVOKE_BEGIN
2726 GenTree* helperCall =
2727 comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
2729 comp->fgMorphTree(helperCall);
2730 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall));
2735 // Emit the following sequence:
2737 // InlinedCallFrame.callTarget = methodHandle // stored in m_Datum
2738 // InlinedCallFrame.m_pCallSiteSP = SP // x86 only
2739 // InlinedCallFrame.m_pCallerReturnAddress = return address
2740 // Thread.gcState = 0
2741 // (non-stub) - update top Frame on TCB
2743 // ----------------------------------------------------------------------------------
2744 // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it).
2745 // The actual field is InlinedCallFrame.m_Datum which has many different uses and meanings.
2747 GenTree* src = nullptr;
2749 if (callType == CT_INDIRECT)
2751 if (comp->info.compPublishStubParam)
2753 src = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->lvaStubArgumentVar, BAD_IL_OFFSET);
2755 // else { If we don't have secret parameter, m_Datum will be initialized by VM code }
2759 assert(callType == CT_USER_FUNC);
2761 void* pEmbedMethodHandle = nullptr;
2762 CORINFO_METHOD_HANDLE embedMethodHandle =
2763 comp->info.compCompHnd->embedMethodHandle(call->gtCallMethHnd, &pEmbedMethodHandle);
2765 noway_assert((!embedMethodHandle) != (!pEmbedMethodHandle));
2767 if (embedMethodHandle != nullptr)
2769 // InlinedCallFrame.callSiteTarget = methodHandle
2770 src = AddrGen(embedMethodHandle);
2774 // InlinedCallFrame.callSiteTarget = *pEmbedMethodHandle
2775 src = Ind(AddrGen(pEmbedMethodHandle));
2781 // Store into InlinedCallFrame.m_Datum, the offset of which is given by offsetOfCallTarget.
2782 GenTreeLclFld* store =
2783 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
2784 callFrameInfo.offsetOfCallTarget);
2787 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, store));
2792 // ----------------------------------------------------------------------------------
2793 // InlinedCallFrame.m_pCallSiteSP = SP
2795 GenTreeLclFld* storeCallSiteSP = new (comp, GT_STORE_LCL_FLD)
2796 GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
2798 storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE);
2800 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeCallSiteSP));
2804 // ----------------------------------------------------------------------------------
2805 // InlinedCallFrame.m_pCallerReturnAddress = &label (the address of the instruction immediately following the call)
2807 GenTreeLclFld* storeLab =
2808 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
2809 callFrameInfo.offsetOfReturnAddress);
2811 // We don't have a real label, and inserting one is hard (even if we made a special node),
2812 // so for now we will just 'know' what this means in codegen.
2813 GenTreeLabel* labelRef = new (comp, GT_LABEL) GenTreeLabel(nullptr);
2814 labelRef->gtType = TYP_I_IMPL;
2815 storeLab->gtOp1 = labelRef;
2817 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeLab));
2819 if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
2821 // Set the TCB's frame to be the one we just created.
2822 // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME)
2823 // has prepended it to the linked list to maintain the stack of Frames.
2825 // Stubs do this once per stub, not once per call.
2826 GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
2827 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
2830 // IMPORTANT **** This instruction must come last!!! ****
2831 // It changes the thread's state to Preemptive mode
2832 // ----------------------------------------------------------------------------------
2833 // [tcb + offsetOfGcState] = 0
2835 GenTree* storeGCState = SetGCState(0);
2836 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState));
2839 //------------------------------------------------------------------------
2840 // InsertPInvokeCallEpilog: Insert the code that goes after every inlined pinvoke call.
2843 // call - the call for which we are inserting the PInvoke epilog.
2848 void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
2850 JITDUMP("======= Inserting PInvoke call epilog\n");
2852 #if COR_JIT_EE_VERSION > 460
2853 if (comp->opts.ShouldUsePInvokeHelpers())
2855 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
2857 // First argument is the address of the frame variable.
2858 GenTree* frameAddr =
2859 new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
2860 frameAddr->gtOper = GT_LCL_VAR_ADDR;
2862 // Insert call to CORINFO_HELP_JIT_PINVOKE_END
2863 GenTree* helperCall =
2864 comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
2866 comp->fgMorphTree(helperCall);
2867 BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall));
2873 GenTree* insertionPoint = call->gtNext;
2875 GenTree* tree = SetGCState(1);
2876 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
2878 tree = CreateReturnTrapSeq();
2879 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
2881 // Pop the frame if necessasry
2882 if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
2884 tree = CreateFrameLinkUpdate(PopFrame);
2885 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
2889 //------------------------------------------------------------------------
2890 // LowerNonvirtPinvokeCall: Lower a non-virtual / indirect PInvoke call
2893 // call - The call to lower.
2896 // The lowered call tree.
2898 GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
2900 // PInvoke lowering varies depending on the flags passed in by the EE. By default,
2901 // GC transitions are generated inline; if CORJIT_FLG2_USE_PINVOKE_HELPERS is specified,
2902 // GC transitions are instead performed using helper calls. Examples of each case are given
2903 // below. Note that the data structure that is used to store information about a call frame
2904 // containing any P/Invoke calls is initialized in the method prolog (see
2905 // InsertPInvokeMethod{Prolog,Epilog} for details).
2907 // Inline transitions:
2908 // InlinedCallFrame inlinedCallFrame;
2912 // // Set up frame information
2913 // inlinedCallFrame.callTarget = methodHandle; // stored in m_Datum
2914 // inlinedCallFrame.m_pCallSiteSP = SP; // x86 only
2915 // inlinedCallFrame.m_pCallerReturnAddress = &label; (the address of the instruction immediately following the
2917 // Thread.m_pFrame = &inlinedCallFrame; (non-IL-stub only)
2919 // // Switch the thread's GC mode to preemptive mode
2920 // thread->m_fPreemptiveGCDisabled = 0;
2922 // // Call the unmanaged method
2925 // // Switch the thread's GC mode back to cooperative mode
2926 // thread->m_fPreemptiveGCDisabled = 1;
2928 // // Rendezvous with a running collection if necessary
2929 // if (g_TrapReturningThreads)
2930 // RareDisablePreemptiveGC();
2932 // Transistions using helpers:
2934 // OpaqueFrame opaqueFrame;
2938 // // Call the JIT_PINVOKE_BEGIN helper
2939 // JIT_PINVOKE_BEGIN(&opaqueFrame);
2941 // // Call the unmanaged method
2944 // // Call the JIT_PINVOKE_END helper
2945 // JIT_PINVOKE_END(&opaqueFrame);
2947 // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target
2948 // platform. They may be changed in the future such that they preserve all register values.
2950 GenTree* result = nullptr;
2951 void* addr = nullptr;
2953 // assert we have seen one of these
2954 noway_assert(comp->info.compCallUnmanaged != 0);
2956 // All code generated by this function must not contain the randomly-inserted NOPs
2957 // that we insert to inhibit JIT spraying in partial trust scenarios.
2958 // The PINVOKE_PROLOG op signals this to the code generator/emitter.
2960 GenTree* prolog = new (comp, GT_NOP) GenTree(GT_PINVOKE_PROLOG, TYP_VOID);
2961 BlockRange().InsertBefore(call, prolog);
2963 InsertPInvokeCallProlog(call);
2965 if (call->gtCallType != CT_INDIRECT)
2967 noway_assert(call->gtCallType == CT_USER_FUNC);
2968 CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
2970 CORINFO_CONST_LOOKUP lookup;
2971 #if COR_JIT_EE_VERSION > 460
2972 comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
2975 lookup.accessType = IAT_PVALUE;
2976 lookup.addr = comp->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, &pIndirection);
2977 if (lookup.addr == nullptr)
2979 lookup.accessType = IAT_PPVALUE;
2980 lookup.addr = pIndirection;
2984 void* addr = lookup.addr;
2985 switch (lookup.accessType)
2988 if (!IsCallTargetInRange(addr))
2990 result = AddrGen(addr);
2994 // a direct call within range of hardware relative call instruction
2995 // stash the address for codegen
2996 call->gtDirectCallAddress = addr;
2997 #ifdef FEATURE_READYTORUN_COMPILER
2998 call->gtEntryPoint.addr = nullptr;
3004 result = Ind(AddrGen(addr));
3008 result = Ind(Ind(AddrGen(addr)));
3013 InsertPInvokeCallEpilog(call);
3018 // Expand the code necessary to calculate the control target.
3019 // Returns: the expression needed to calculate the control target
3020 // May insert embedded statements
3021 GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
3023 noway_assert(call->gtCallType == CT_USER_FUNC);
3025 // If this is a tail call via helper, thisPtr will be the third argument.
3027 regNumber thisPtrArgReg;
3029 #ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
3030 if (call->IsTailCallViaHelper())
3033 thisPtrArgReg = REG_ARG_2;
3036 #endif // !_TARGET_X86_
3039 thisPtrArgReg = comp->codeGen->genGetThisArgReg(call);
3042 // get a reference to the thisPtr being passed
3043 fgArgTabEntry* argEntry = comp->gtArgEntryByArgNum(call, thisPtrArgNum);
3044 assert(argEntry->regNum == thisPtrArgReg);
3045 assert(argEntry->node->gtOper == GT_PUTARG_REG);
3046 GenTree* thisPtr = argEntry->node->gtOp.gtOp1;
3048 // If what we are passing as the thisptr is not already a local, make a new local to place it in
3049 // because we will be creating expressions based on it.
3051 if (thisPtr->IsLocal())
3053 lclNum = thisPtr->gtLclVarCommon.gtLclNum;
3057 // Split off the thisPtr and store to a temporary variable.
3058 if (vtableCallTemp == BAD_VAR_NUM)
3060 vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call"));
3063 LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node);
3064 thisPtrUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), vtableCallTemp);
3066 lclNum = vtableCallTemp;
3069 // We'll introduce another use of this local so increase its ref count.
3070 comp->lvaTable[lclNum].incRefCnts(comp->compCurBB->getBBWeight(comp), comp);
3072 // If the thisPtr is a local field, then construct a local field type node
3074 if (thisPtr->isLclField())
3076 local = new (comp, GT_LCL_FLD)
3077 GenTreeLclFld(GT_LCL_FLD, thisPtr->TypeGet(), lclNum, thisPtr->AsLclFld()->gtLclOffs);
3081 local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, thisPtr->TypeGet(), lclNum, BAD_IL_OFFSET);
3084 // pointer to virtual table = [REG_CALL_THIS + offs]
3085 GenTree* result = Ind(Offset(local, VPTR_OFFS));
3087 // Get hold of the vtable offset (note: this might be expensive)
3088 unsigned vtabOffsOfIndirection;
3089 unsigned vtabOffsAfterIndirection;
3090 comp->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
3091 &vtabOffsAfterIndirection);
3093 // Get the appropriate vtable chunk
3094 // result = [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
3095 result = Ind(Offset(result, vtabOffsOfIndirection));
3097 // Load the function address
3098 // result = [reg+vtabOffs]
3099 result = Ind(Offset(result, vtabOffsAfterIndirection));
3104 // Lower stub dispatched virtual calls.
3105 GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
3107 assert((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB);
3109 // An x86 JIT which uses full stub dispatch must generate only
3110 // the following stub dispatch calls:
3112 // (1) isCallRelativeIndirect:
3113 // call dword ptr [rel32] ; FF 15 ---rel32----
3114 // (2) isCallRelative:
3115 // call abc ; E8 ---rel32----
3116 // (3) isCallRegisterIndirect:
3118 // call dword ptr [eax] ; FF 10
3120 // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
3121 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
3123 GenTree* result = nullptr;
3125 #ifdef _TARGET_64BIT_
3126 // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef
3127 // exception. Therefore, JIT needs to emit an explicit null check. Note that Jit64 too generates
3128 // an explicit null check.
3130 // Tail calls: fgMorphTailCall() materializes null check explicitly and hence no need to emit
3133 // Non-64-bit: No need to null check the this pointer - the dispatch code will deal with this.
3134 // The VM considers exceptions that occur in stubs on 64-bit to be not managed exceptions and
3135 // it would be difficult to change this in a way so that it affects only the right stubs.
3137 if (!call->IsTailCallViaHelper())
3139 call->gtFlags |= GTF_CALL_NULLCHECK;
3143 // TODO-Cleanup: Disable emitting random NOPs
3145 // This is code to set up an indirect call to a stub address computed
3146 // via dictionary lookup.
3147 if (call->gtCallType == CT_INDIRECT)
3149 NYI_X86("Virtual Stub dispatched call lowering via dictionary lookup");
3151 // The importer decided we needed a stub call via a computed
3152 // stub dispatch address, i.e. an address which came from a dictionary lookup.
3153 // - The dictionary lookup produces an indirected address, suitable for call
3154 // via "call [REG_VIRTUAL_STUB_PARAM]"
3156 // This combination will only be generated for shared generic code and when
3157 // stub dispatch is active.
3159 // fgMorphArgs will have created trees to pass the address in REG_VIRTUAL_STUB_PARAM.
3160 // All we have to do here is add an indirection to generate the actual call target.
3162 GenTree* ind = Ind(call->gtCallAddr);
3163 BlockRange().InsertAfter(call->gtCallAddr, ind);
3164 call->gtCallAddr = ind;
3168 // Direct stub call.
3169 // Get stub addr. This will return NULL if virtual call stubs are not active
3170 void* stubAddr = call->gtStubCallStubAddr;
3171 noway_assert(stubAddr != nullptr);
3173 // If not CT_INDIRECT, then it should always be relative indir call.
3174 // This is ensured by VM.
3175 noway_assert(call->IsVirtualStubRelativeIndir());
3177 // Direct stub calls, though the stubAddr itself may still need to be
3178 // accesed via an indirection.
3179 GenTree* addr = AddrGen(stubAddr);
3182 // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as
3183 // the target address, and we set a flag that it's a VSD call. The helper then
3184 // handles any necessary indirection.
3185 if (call->IsTailCallViaHelper())
3189 #endif // _TARGET_X86_
3191 if (result == nullptr)
3193 GenTree* indir = Ind(addr);
3195 // On x86 we generate this:
3196 // call dword ptr [rel32] ; FF 15 ---rel32----
3197 // So we don't use a register.
3198 #ifndef _TARGET_X86_
3199 // on x64 we must materialize the target using specific registers.
3200 addr->gtRegNum = REG_VIRTUAL_STUB_PARAM;
3201 indir->gtRegNum = REG_JUMP_THUNK_PARAM;
3207 // TODO-Cleanup: start emitting random NOPS
3211 //------------------------------------------------------------------------
3212 // AddrModeCleanupHelper: Remove the nodes that are no longer used after an
3213 // addressing mode is constructed
3216 // addrMode - A pointer to a new GenTreeAddrMode
3217 // node - The node currently being considered for removal
3223 // 'addrMode' and 'node' must be contained in the current block
3225 void Lowering::AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node)
3227 if (node == addrMode->Base() || node == addrMode->Index())
3232 // TODO-LIR: change this to use the LIR mark bit and iterate instead of recursing
3233 for (GenTree* operand : node->Operands())
3235 AddrModeCleanupHelper(addrMode, operand);
3238 BlockRange().Remove(node);
3241 // given two nodes which will be used in an addressing mode (base, index)
3242 // walk backwards from the use to those nodes to determine if they are
3243 // potentially modified in that range
3245 // returns: true if the sources given may be modified before they are used
3246 bool Lowering::AreSourcesPossiblyModified(GenTree* addr, GenTree* base, GenTree* index)
3248 assert(addr != nullptr);
3250 for (GenTree* cursor = addr; cursor != nullptr; cursor = cursor->gtPrev)
3257 if (cursor == index)
3262 if (base == nullptr && index == nullptr)
3267 if (base != nullptr && comp->fgNodesMayInterfere(base, cursor))
3272 if (index != nullptr && comp->fgNodesMayInterfere(index, cursor))
3281 //------------------------------------------------------------------------
3282 // TryCreateAddrMode: recognize trees which can be implemented using an
3283 // addressing mode and transform them to a GT_LEA
3286 // use: the use of the address we want to transform
3287 // isIndir: true if this addressing mode is the child of an indir
3290 // The created LEA node or the original address node if an LEA could
3293 GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
3295 GenTree* addr = use.Def();
3296 GenTreePtr base = nullptr;
3297 GenTreePtr index = nullptr;
3299 unsigned offset = 0;
3302 // TODO-1stClassStructs: This logic is here to preserve prior behavior. Note that previously
3303 // block ops were not considered for addressing modes, but an add under it may have been.
3304 // This should be replaced with logic that more carefully determines when an addressing mode
3305 // would be beneficial for a block op.
3308 GenTree* indir = use.User();
3309 if (indir->TypeGet() == TYP_STRUCT)
3313 else if (varTypeIsStruct(indir))
3315 // We can have an indirection on the rhs of a block copy (it is the source
3316 // object). This is not a "regular" indirection.
3317 // (Note that the parent check could be costly.)
3318 GenTree* parent = indir->gtGetParent(nullptr);
3319 if ((parent != nullptr) && parent->OperIsIndir())
3325 isIndir = !indir->OperIsBlk();
3330 // Find out if an addressing mode can be constructed
3332 comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &scale, &offset, true /*nogen*/);
3341 // this is just a reg-const add
3342 if (index == nullptr)
3347 // this is just a reg-reg add
3348 if (scale == 1 && offset == 0)
3354 // make sure there are not any side effects between def of leaves and use
3355 if (!doAddrMode || AreSourcesPossiblyModified(addr, base, index))
3357 JITDUMP(" No addressing mode\n");
3361 GenTreePtr arrLength = nullptr;
3363 JITDUMP("Addressing mode:\n");
3366 if (index != nullptr)
3368 JITDUMP(" + Index * %u + %u\n", scale, offset);
3373 JITDUMP(" + %u\n", offset);
3376 var_types addrModeType = addr->TypeGet();
3377 if (addrModeType == TYP_REF)
3379 addrModeType = TYP_BYREF;
3382 GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset);
3384 addrMode->gtRsvdRegs = addr->gtRsvdRegs;
3385 addrMode->gtFlags |= (addr->gtFlags & (GTF_ALL_EFFECT | GTF_IND_FLAGS));
3387 JITDUMP("New addressing mode node:\n");
3391 // Required to prevent assert failure:
3392 // Assertion failed 'op1 && op2' in flowgraph.cpp, Line: 34431
3393 // when iterating the operands of a GT_LEA
3394 // Test Case: self_host_tests_amd64\jit\jit64\opt\cse\VolatileTest_op_mul.exe
3395 // Method: TestCSE:.cctor
3396 // The method genCreateAddrMode() above probably should be fixed
3397 // to not return rev=true, when index is returned as NULL
3399 if (rev && index == nullptr)
3406 addrMode->gtFlags |= GTF_REVERSE_OPS;
3410 addrMode->gtFlags &= ~(GTF_REVERSE_OPS);
3413 BlockRange().InsertAfter(addr, addrMode);
3415 // Now we need to remove all the nodes subsumed by the addrMode
3416 AddrModeCleanupHelper(addrMode, addr);
3418 // Replace the original address node with the addrMode.
3419 use.ReplaceWith(comp, addrMode);
3424 //------------------------------------------------------------------------
3425 // LowerAdd: turn this add into a GT_LEA if that would be profitable
3428 // node - the node we care about
3431 // The next node to lower.
3433 GenTree* Lowering::LowerAdd(GenTree* node)
3435 GenTree* next = node->gtNext;
3437 #ifdef _TARGET_ARMARCH_
3438 // For ARM architectures we don't have the LEA instruction
3439 // therefore we won't get much benefit from doing this.
3441 #else // _TARGET_ARMARCH_
3442 if (!varTypeIsIntegralOrI(node))
3448 if (!BlockRange().TryGetUse(node, &use))
3453 // if this is a child of an indir, let the parent handle it.
3454 GenTree* parent = use.User();
3455 if (parent->OperIsIndir())
3460 // if there is a chain of adds, only look at the topmost one
3461 if (parent->gtOper == GT_ADD)
3466 GenTree* addr = TryCreateAddrMode(std::move(use), false);
3467 return addr->gtNext;
3468 #endif // !_TARGET_ARMARCH_
3471 //------------------------------------------------------------------------
3472 // LowerUnsignedDivOrMod: transform GT_UDIV/GT_UMOD nodes with a const power of 2
3473 // divisor into GT_RSZ/GT_AND nodes.
3476 // node - pointer to the GT_UDIV/GT_UMOD node to be lowered
3478 void Lowering::LowerUnsignedDivOrMod(GenTree* node)
3480 assert((node->OperGet() == GT_UDIV) || (node->OperGet() == GT_UMOD));
3482 GenTree* divisor = node->gtGetOp2();
3483 GenTree* dividend = node->gtGetOp1();
3485 if (divisor->IsCnsIntOrI()
3487 && (dividend->OperGet() != GT_LONG)
3491 size_t divisorValue = static_cast<size_t>(divisor->gtIntCon.IconValue());
3493 if (isPow2(divisorValue))
3497 if (node->OperGet() == GT_UDIV)
3500 divisorValue = genLog2(divisorValue);
3508 node->SetOper(newOper);
3509 divisor->gtIntCon.SetIconValue(divisorValue);
3514 //------------------------------------------------------------------------
3515 // LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
3516 // const divisor into equivalent but faster sequences.
3519 // node - pointer to node we care about
3522 // The next node to lower.
3524 GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
3526 assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
3528 GenTree* next = node->gtNext;
3529 GenTree* divMod = node;
3530 GenTree* divisor = divMod->gtGetOp2();
3532 if (!divisor->IsCnsIntOrI())
3534 return next; // no transformations to make
3537 const var_types type = divMod->TypeGet();
3538 assert((type == TYP_INT) || (type == TYP_LONG));
3540 GenTree* dividend = divMod->gtGetOp1();
3542 if (dividend->IsCnsIntOrI())
3544 // We shouldn't see a divmod with constant operands here but if we do then it's likely
3545 // because optimizations are disabled or it's a case that's supposed to throw an exception.
3546 // Don't optimize this.
3550 ssize_t divisorValue = divisor->gtIntCon.IconValue();
3552 if (divisorValue == -1)
3554 // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception.
3556 // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is
3557 // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this
3558 // case so optimizing this case would break C# code.
3560 // A runtime check could be used to handle this case but it's probably too rare to matter.
3564 bool isDiv = divMod->OperGet() == GT_DIV;
3568 if ((type == TYP_INT && divisorValue == INT_MIN) || (type == TYP_LONG && divisorValue == INT64_MIN))
3570 // If the divisor is the minimum representable integer value then we can use a compare,
3571 // the result is 1 iff the dividend equals divisor.
3572 divMod->SetOper(GT_EQ);
3577 size_t absDivisorValue =
3578 (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue));
3580 if (!isPow2(absDivisorValue))
3585 // We're committed to the conversion now. Go find the use.
3587 if (!BlockRange().TryGetUse(node, &use))
3589 assert(!"signed DIV/MOD node is unused");
3593 // We need to use the dividend node multiple times so its value needs to be
3594 // computed once and stored in a temp variable.
3596 unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
3598 LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod);
3599 opDividend.ReplaceWithLclVar(comp, curBBWeight);
3601 dividend = divMod->gtGetOp1();
3602 assert(dividend->OperGet() == GT_LCL_VAR);
3604 unsigned dividendLclNum = dividend->gtLclVar.gtLclNum;
3606 GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63));
3608 if (absDivisorValue == 2)
3610 // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1.
3611 // We can get the same result by using GT_RSZ instead of GT_RSH.
3612 adjustment->SetOper(GT_RSZ);
3616 adjustment = comp->gtNewOperNode(GT_AND, type, adjustment, comp->gtNewIconNode(absDivisorValue - 1, type));
3619 GenTree* adjustedDividend =
3620 comp->gtNewOperNode(GT_ADD, type, adjustment, comp->gtNewLclvNode(dividendLclNum, type));
3622 comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
3628 // perform the division by right shifting the adjusted dividend
3629 divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue));
3631 newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor);
3633 if (divisorValue < 0)
3635 // negate the result if the divisor is negative
3636 newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod);
3641 // divisor % dividend = dividend - divisor x (dividend / divisor)
3642 // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor)
3643 // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1)
3644 divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1));
3646 newDivMod = comp->gtNewOperNode(GT_SUB, type, comp->gtNewLclvNode(dividendLclNum, type),
3647 comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor));
3649 comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
3652 // Remove the divisor and dividend nodes from the linear order,
3653 // since we have reused them and will resequence the tree
3654 BlockRange().Remove(divisor);
3655 BlockRange().Remove(dividend);
3657 // linearize and insert the new tree before the original divMod node
3658 BlockRange().InsertBefore(divMod, LIR::SeqTree(comp, newDivMod));
3659 BlockRange().Remove(divMod);
3661 // replace the original divmod node with the new divmod tree
3662 use.ReplaceWith(comp, newDivMod);
3664 return newDivMod->gtNext;
3667 //------------------------------------------------------------------------
3668 // LowerStoreInd: attempt to transform an indirect store to use an
3672 // node - the node we care about
3674 void Lowering::LowerStoreInd(GenTree* node)
3676 assert(node != nullptr);
3677 assert(node->OperGet() == GT_STOREIND);
3679 TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
3681 // Mark all GT_STOREIND nodes to indicate that it is not known
3682 // whether it represents a RMW memory op.
3683 node->AsStoreInd()->SetRMWStatusDefault();
3686 void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
3688 GenTree* src = blkNode->Data();
3689 // TODO-1stClassStructs: Don't require this.
3690 assert(blkNode->OperIsInitBlkOp() || !src->OperIsLocal());
3691 TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
3694 //------------------------------------------------------------------------
3695 // LowerArrElem: Lower a GT_ARR_ELEM node
3698 // node - the GT_ARR_ELEM node to lower.
3701 // The next node to lower.
3704 // pTree points to a pointer to a GT_ARR_ELEM node.
3707 // This performs the following lowering. We start with a node of the form:
3713 // First, we create temps for arrObj if it is not already a lclVar, and for any of the index
3714 // expressions that have side-effects.
3715 // We then transform the tree into:
3716 // <offset is null - no accumulated offset for the first index>
3719 // /--* ArrIndex[i, ]
3721 // /--| arrOffs[i, ]
3724 // +--* ArrIndex[*,j]
3726 // /--| arrOffs[*,j]
3727 // +--* lclVar NewTemp
3728 // /--* lea (scale = element size, offset = offset of first element)
3730 // The new stmtExpr may be omitted if the <arrObj> is a lclVar.
3731 // The new stmtExpr may be embedded if the <arrObj> is not the first tree in linear order for
3732 // the statement containing the original arrMD.
3733 // Note that the arrMDOffs is the INDEX of the lea, but is evaluated before the BASE (which is the second
3734 // reference to NewTemp), because that provides more accurate lifetimes.
3735 // There may be 1, 2 or 3 dimensions, with 1, 2 or 3 arrMDIdx nodes, respectively.
3737 GenTree* Lowering::LowerArrElem(GenTree* node)
3739 // This will assert if we don't have an ArrElem node
3740 GenTreeArrElem* arrElem = node->AsArrElem();
3741 const unsigned char rank = arrElem->gtArrElem.gtArrRank;
3742 const unsigned blockWeight = m_block->getBBWeight(comp);
3744 JITDUMP("Lowering ArrElem\n");
3745 JITDUMP("============\n");
3746 DISPTREERANGE(BlockRange(), arrElem);
3749 assert(arrElem->gtArrObj->TypeGet() == TYP_REF);
3751 // We need to have the array object in a lclVar.
3752 if (!arrElem->gtArrObj->IsLocal())
3754 LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem);
3755 arrObjUse.ReplaceWithLclVar(comp, blockWeight);
3758 GenTree* arrObjNode = arrElem->gtArrObj;
3759 assert(arrObjNode->IsLocal());
3761 GenTree* insertionPoint = arrElem;
3763 // The first ArrOffs node will have 0 for the offset of the previous dimension.
3764 GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
3765 BlockRange().InsertBefore(insertionPoint, prevArrOffs);
3767 for (unsigned char dim = 0; dim < rank; dim++)
3769 GenTree* indexNode = arrElem->gtArrElem.gtArrInds[dim];
3771 // Use the original arrObjNode on the 0th ArrIndex node, and clone it for subsequent ones.
3772 GenTreePtr idxArrObjNode;
3775 idxArrObjNode = arrObjNode;
3779 idxArrObjNode = comp->gtClone(arrObjNode);
3780 BlockRange().InsertBefore(insertionPoint, idxArrObjNode);
3783 // Next comes the GT_ARR_INDEX node.
3784 GenTreeArrIndex* arrMDIdx = new (comp, GT_ARR_INDEX)
3785 GenTreeArrIndex(TYP_INT, idxArrObjNode, indexNode, dim, rank, arrElem->gtArrElem.gtArrElemType);
3786 arrMDIdx->gtFlags |= ((idxArrObjNode->gtFlags | indexNode->gtFlags) & GTF_ALL_EFFECT);
3787 BlockRange().InsertBefore(insertionPoint, arrMDIdx);
3789 GenTree* offsArrObjNode = comp->gtClone(arrObjNode);
3790 BlockRange().InsertBefore(insertionPoint, offsArrObjNode);
3792 GenTreeArrOffs* arrOffs =
3793 new (comp, GT_ARR_OFFSET) GenTreeArrOffs(TYP_I_IMPL, prevArrOffs, arrMDIdx, offsArrObjNode, dim, rank,
3794 arrElem->gtArrElem.gtArrElemType);
3795 arrOffs->gtFlags |= ((prevArrOffs->gtFlags | arrMDIdx->gtFlags | offsArrObjNode->gtFlags) & GTF_ALL_EFFECT);
3796 BlockRange().InsertBefore(insertionPoint, arrOffs);
3798 prevArrOffs = arrOffs;
3801 // Generate the LEA and make it reverse evaluation, because we want to evaluate the index expression before the
3803 unsigned scale = arrElem->gtArrElem.gtArrElemSize;
3804 unsigned offset = comp->eeGetMDArrayDataOffset(arrElem->gtArrElem.gtArrElemType, arrElem->gtArrElem.gtArrRank);
3806 GenTreePtr leaIndexNode = prevArrOffs;
3807 if (!jitIsScaleIndexMul(scale))
3809 // We do the address arithmetic in TYP_I_IMPL, though note that the lower bounds and lengths in memory are
3811 GenTreePtr scaleNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, scale);
3812 GenTreePtr mulNode = new (comp, GT_MUL) GenTreeOp(GT_MUL, TYP_I_IMPL, leaIndexNode, scaleNode);
3813 BlockRange().InsertBefore(insertionPoint, scaleNode, mulNode);
3814 leaIndexNode = mulNode;
3818 GenTreePtr leaBase = comp->gtClone(arrObjNode);
3819 BlockRange().InsertBefore(insertionPoint, leaBase);
3821 GenTreePtr leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset);
3822 leaNode->gtFlags |= GTF_REVERSE_OPS;
3824 BlockRange().InsertBefore(insertionPoint, leaNode);
3826 LIR::Use arrElemUse;
3827 if (BlockRange().TryGetUse(arrElem, &arrElemUse))
3829 arrElemUse.ReplaceWith(comp, leaNode);
3832 BlockRange().Remove(arrElem);
3834 JITDUMP("Results of lowering ArrElem:\n");
3835 DISPTREERANGE(BlockRange(), leaNode);
3841 void Lowering::DoPhase()
3844 // The code in this #if can be used to debug lowering issues according to
3845 // method hash. To use, simply set environment variables lowerhashlo and lowerhashhi
3847 unsigned methHash = info.compMethodHash();
3848 char* lostr = getenv("lowerhashlo");
3849 unsigned methHashLo = 0;
3852 sscanf_s(lostr, "%x", &methHashLo);
3854 char* histr = getenv("lowerhashhi");
3855 unsigned methHashHi = UINT32_MAX;
3858 sscanf_s(histr, "%x", &methHashHi);
3860 if (methHash < methHashLo || methHash > methHashHi)
3864 printf("Lowering for method %s, hash = 0x%x.\n",
3865 info.compFullName, info.compMethodHash());
3866 printf(""); // in our logic this causes a flush
3871 #if !defined(_TARGET_64BIT_)
3872 DecomposeLongs decomp(comp); // Initialize the long decomposition class.
3873 decomp.PrepareForDecomposition();
3874 #endif // !defined(_TARGET_64BIT_)
3876 for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
3878 /* Make the block publicly available */
3879 comp->compCurBB = block;
3881 #if !defined(_TARGET_64BIT_)
3882 decomp.DecomposeBlock(block);
3883 #endif //!_TARGET_64BIT_
3888 // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the
3889 // appropriate spots. NOTE: there is a minor optimization opportunity here, as we still create p/invoke data
3890 // structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
3891 if (comp->info.compCallUnmanaged)
3893 InsertPInvokeMethodProlog();
3897 JITDUMP("Lower has completed modifying nodes, proceeding to initialize LSRA TreeNodeInfo structs...\n");
3900 comp->fgDispBasicBlocks(true);
3904 // TODO-Throughput: We re-sort local variables to get the goodness of enregistering recently
3905 // introduced local variables both by Rationalize and Lower; downside is we need to
3906 // recompute standard local variable liveness in order to get Linear CodeGen working.
3907 // For now we'll take the throughput hit of recomputing local liveness but in the long term
3908 // we're striving to use the unified liveness computation (fgLocalVarLiveness) and stop
3909 // computing it separately in LSRA.
3910 if (comp->lvaCount != 0)
3912 comp->lvaSortAgain = true;
3914 comp->EndPhase(PHASE_LOWERING_DECOMP);
3916 comp->fgLocalVarLiveness();
3917 // local var liveness can delete code, which may create empty blocks
3918 if (!comp->opts.MinOpts() && !comp->opts.compDbgCode)
3920 comp->optLoopsMarked = false;
3921 bool modified = comp->fgUpdateFlowGraph();
3922 if (modified || comp->lvaSortAgain)
3924 JITDUMP("had to run another liveness pass:\n");
3925 comp->fgLocalVarLiveness();
3929 JITDUMP("Liveness pass finished after lowering, IR:\n");
3930 JITDUMP("lvasortagain = %d\n", comp->lvaSortAgain);
3933 comp->fgDispBasicBlocks(true);
3937 // The initialization code for the TreeNodeInfo map was initially part of a single full IR
3938 // traversal and it has been split because the order of traversal performed by fgWalkTreePost
3939 // does not necessarily lower nodes in execution order and also, it could potentially
3940 // add new BasicBlocks on the fly as part of the Lowering pass so the traversal won't be complete.
3942 // Doing a new traversal guarantees we 'see' all new introduced trees and basic blocks allowing us
3943 // to correctly initialize all the data structures LSRA requires later on.
3944 // This code still has issues when it has to do with initialization of recently introduced locals by
3945 // lowering. The effect of this is that any temporary local variable introduced by lowering won't be
3946 // enregistered yielding suboptimal CQ.
3947 // The reason for this is because we cannot re-sort the local variables per ref-count and bump of the number of
3948 // tracked variables just here because then LSRA will work with mismatching BitSets (i.e. BitSets with different
3949 // 'epochs' that were created before and after variable resorting, that will result in different number of tracked
3950 // local variables).
3952 // The fix for this is to refactor this code to be run JUST BEFORE LSRA and not as part of lowering.
3953 // It's also desirable to avoid initializing this code using a non-execution order traversal.
3955 LsraLocation currentLoc = 1;
3956 for (BasicBlock* block = m_lsra->startBlockSequence(); block != nullptr; block = m_lsra->moveToNextBlock())
3960 // Increment the LsraLocation (currentLoc) at each BasicBlock.
3961 // This ensures that the block boundary (RefTypeBB, RefTypeExpUse and RefTypeDummyDef) RefPositions
3962 // are in increasing location order.
3966 for (GenTree* node : BlockRange().NonPhiNodes())
3968 /* We increment the number position of each tree node by 2 to
3969 * simplify the logic when there's the case of a tree that implicitly
3970 * does a dual-definition of temps (the long case). In this case
3971 * is easier to already have an idle spot to handle a dual-def instead
3972 * of making some messy adjustments if we only increment the
3973 * number position by one.
3976 node->gtSeqNum = currentLoc;
3979 node->gtLsraInfo.Initialize(m_lsra, node, currentLoc);
3980 node->gtClearReg(comp);
3982 // Mark the node's operands as used
3983 for (GenTree* operand : node->Operands())
3985 operand->gtLIRFlags &= ~LIR::Flags::IsUnusedValue;
3988 // If the node produces a value, mark it as unused.
3989 if (node->IsValue())
3991 node->gtLIRFlags |= LIR::Flags::IsUnusedValue;
3997 for (GenTree* node : BlockRange().NonPhiNodes())
3999 TreeNodeInfoInit(node);
4001 // Only nodes that produce values should have a non-zero dstCount.
4002 assert((node->gtLsraInfo.dstCount == 0) || node->IsValue());
4004 // If the node produces an unused value, mark it as a local def-use
4005 if ((node->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
4007 node->gtLsraInfo.isLocalDefUse = true;
4008 node->gtLsraInfo.dstCount = 0;
4012 // TODO-CQ: Enable this code after fixing the isContained() logic to not abort for these
4013 // top-level nodes that throw away their result.
4014 // If this is an interlocked operation that has a non-last-use lclVar as its op2,
4015 // make sure we allocate a target register for the interlocked operation.; otherwise we need
4016 // not allocate a register
4017 else if ((tree->OperGet() == GT_LOCKADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_XADD))
4019 tree->gtLsraInfo.dstCount = 0;
4020 if (tree->gtGetOp2()->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH) == 0)
4021 tree->gtLsraInfo.isLocalDefUse = true;
4026 assert(BlockRange().CheckLIR(comp, true));
4028 DBEXEC(VERBOSE, DumpNodeInfoMap());
4033 //------------------------------------------------------------------------
4034 // Lowering::CheckCallArg: check that a call argument is in an expected
4035 // form after lowering.
4038 // arg - the argument to check.
4040 void Lowering::CheckCallArg(GenTree* arg)
4042 if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
4047 switch (arg->OperGet())
4049 #if !defined(_TARGET_64BIT_)
4051 assert(arg->gtGetOp1()->OperIsPutArg());
4052 assert(arg->gtGetOp2()->OperIsPutArg());
4058 GenTreeArgList* list = arg->AsArgList();
4059 assert(list->IsAggregate());
4061 for (; list != nullptr; list = list->Rest())
4063 assert(list->Current()->OperIsPutArg());
4069 assert(arg->OperIsPutArg());
4074 //------------------------------------------------------------------------
4075 // Lowering::CheckCall: check that a call is in an expected form after
4076 // lowering. Currently this amounts to checking its
4077 // arguments, but could be expanded to verify more
4078 // properties in the future.
4081 // call - the call to check.
4083 void Lowering::CheckCall(GenTreeCall* call)
4085 if (call->gtCallObjp != nullptr)
4087 CheckCallArg(call->gtCallObjp);
4090 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
4092 CheckCallArg(args->Current());
4095 for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest())
4097 CheckCallArg(args->Current());
4101 //------------------------------------------------------------------------
4102 // Lowering::CheckNode: check that an LIR node is in an expected form
4106 // node - the node to check.
4108 void Lowering::CheckNode(GenTree* node)
4110 switch (node->OperGet())
4113 CheckCall(node->AsCall());
4118 #ifdef _TARGET_64BIT_
4120 case GT_STORE_LCL_VAR:
4121 #endif // _TARGET_64BIT_
4122 assert(node->TypeGet() != TYP_SIMD12);
4131 //------------------------------------------------------------------------
4132 // Lowering::CheckBlock: check that the contents of an LIR block are in an
4133 // expected form after lowering.
4136 // compiler - the compiler context.
4137 // block - the block to check.
4139 bool Lowering::CheckBlock(Compiler* compiler, BasicBlock* block)
4141 assert(block->isEmpty() || block->IsLIR());
4143 LIR::Range& blockRange = LIR::AsRange(block);
4144 for (GenTree* node : blockRange)
4149 assert(blockRange.CheckLIR(compiler));
4154 void Lowering::LowerBlock(BasicBlock* block)
4156 assert(block == comp->compCurBB); // compCurBB must already be set.
4157 assert(block->isEmpty() || block->IsLIR());
4161 // NOTE: some of the lowering methods insert calls before the node being
4162 // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In
4163 // general, any code that is inserted before the current node should be
4164 // "pre-lowered" as they won't be subject to further processing.
4165 // Lowering::CheckBlock() runs some extra checks on call arguments in
4166 // order to help catch unlowered nodes.
4168 GenTree* node = BlockRange().FirstNode();
4169 while (node != nullptr)
4171 node = LowerNode(node);
4174 assert(CheckBlock(comp, block));
4177 /** Verifies if both of these trees represent the same indirection.
4178 * Used by Lower to annotate if CodeGen generate an instruction of the
4179 * form *addrMode BinOp= expr
4181 * Preconditions: both trees are children of GT_INDs and their underlying children
4182 * have the same gtOper.
4184 * This is a first iteration to actually recognize trees that can be code-generated
4185 * as a single read-modify-write instruction on AMD64/x86. For now
4186 * this method only supports the recognition of simple addressing modes (through GT_LEA)
4187 * or local var indirections. Local fields, array access and other more complex nodes are
4188 * not yet supported.
4190 * TODO-CQ: Perform tree recognition by using the Value Numbering Package, that way we can recognize
4191 * arbitrary complex trees and support much more addressing patterns.
4193 bool Lowering::IndirsAreEquivalent(GenTreePtr candidate, GenTreePtr storeInd)
4195 assert(candidate->OperGet() == GT_IND);
4196 assert(storeInd->OperGet() == GT_STOREIND);
4198 // We should check the size of the indirections. If they are
4199 // different, say because of a cast, then we can't call them equivalent. Doing so could cause us
4201 // Signed-ness difference is okay and expected since a store indirection must always
4202 // be signed based on the CIL spec, but a load could be unsigned.
4203 if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType))
4208 GenTreePtr pTreeA = candidate->gtGetOp1();
4209 GenTreePtr pTreeB = storeInd->gtGetOp1();
4211 // This method will be called by codegen (as well as during lowering).
4212 // After register allocation, the sources may have been spilled and reloaded
4213 // to a different register, indicated by an inserted GT_RELOAD node.
4214 pTreeA = pTreeA->gtSkipReloadOrCopy();
4215 pTreeB = pTreeB->gtSkipReloadOrCopy();
4220 if (pTreeA->OperGet() != pTreeB->OperGet())
4225 oper = pTreeA->OperGet();
4229 case GT_LCL_VAR_ADDR:
4230 case GT_CLS_VAR_ADDR:
4232 return NodesAreEquivalentLeaves(pTreeA, pTreeB);
4236 GenTreeAddrMode* gtAddr1 = pTreeA->AsAddrMode();
4237 GenTreeAddrMode* gtAddr2 = pTreeB->AsAddrMode();
4238 return NodesAreEquivalentLeaves(gtAddr1->Base(), gtAddr2->Base()) &&
4239 NodesAreEquivalentLeaves(gtAddr1->Index(), gtAddr2->Index()) &&
4240 gtAddr1->gtScale == gtAddr2->gtScale && gtAddr1->gtOffset == gtAddr2->gtOffset;
4243 // We don't handle anything that is not either a constant,
4244 // a local var or LEA.
4249 /** Test whether the two given nodes are the same leaves.
4250 * Right now, only constant integers and local variables are supported
4252 bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2)
4254 if (tree1 == nullptr && tree2 == nullptr)
4259 // both null, they are equivalent, otherwise if either is null not equivalent
4260 if (tree1 == nullptr || tree2 == nullptr)
4265 tree1 = tree1->gtSkipReloadOrCopy();
4266 tree2 = tree2->gtSkipReloadOrCopy();
4268 if (tree1->TypeGet() != tree2->TypeGet())
4273 if (tree1->OperGet() != tree2->OperGet())
4278 if (!tree1->OperIsLeaf() || !tree2->OperIsLeaf())
4283 switch (tree1->OperGet())
4286 return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal &&
4287 tree1->IsIconHandle() == tree2->IsIconHandle();
4289 case GT_LCL_VAR_ADDR:
4290 return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum;
4291 case GT_CLS_VAR_ADDR:
4292 return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd;
4298 #ifdef _TARGET_64BIT_
4300 * Get common information required to handle a cast instruction
4302 * Right now only supports 64 bit targets. In order to support 32 bit targets the
4303 * switch statement needs work.
4306 void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
4308 // Intialize castInfo
4309 memset(castInfo, 0, sizeof(*castInfo));
4311 GenTreePtr castOp = treeNode->gtCast.CastOp();
4313 var_types dstType = treeNode->CastToType();
4314 var_types srcType = castOp->TypeGet();
4316 castInfo->unsignedDest = varTypeIsUnsigned(dstType);
4317 castInfo->unsignedSource = varTypeIsUnsigned(srcType);
4319 // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set.
4320 if (!castInfo->unsignedSource && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
4322 srcType = genUnsignedType(srcType);
4323 castInfo->unsignedSource = true;
4326 if (treeNode->gtOverflow() &&
4327 (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG)))
4329 castInfo->requiresOverflowCheck = true;
4332 if (castInfo->requiresOverflowCheck)
4334 ssize_t typeMin = 0;
4335 ssize_t typeMax = 0;
4336 ssize_t typeMask = 0;
4337 bool signCheckOnly = false;
4339 // Do we need to compare the value, or just check masks
4344 assert(!"unreachable: getCastDescription");
4348 typeMask = ssize_t((int)0xFFFFFF80);
4349 typeMin = SCHAR_MIN;
4350 typeMax = SCHAR_MAX;
4354 typeMask = ssize_t((int)0xFFFFFF00L);
4358 typeMask = ssize_t((int)0xFFFF8000);
4364 typeMask = ssize_t((int)0xFFFF0000L);
4368 if (srcType == TYP_UINT)
4370 signCheckOnly = true;
4374 typeMask = 0xFFFFFFFF80000000LL;
4381 if (srcType == TYP_INT)
4383 signCheckOnly = true;
4387 typeMask = 0xFFFFFFFF00000000LL;
4392 signCheckOnly = true;
4396 signCheckOnly = true;
4402 castInfo->signCheckOnly = true;
4405 castInfo->typeMax = typeMax;
4406 castInfo->typeMin = typeMin;
4407 castInfo->typeMask = typeMask;
4411 #endif // _TARGET_64BIT_
4414 void Lowering::DumpNodeInfoMap()
4416 printf("-----------------------------\n");
4417 printf("TREE NODE INFO DUMP\n");
4418 printf("-----------------------------\n");
4420 for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
4422 for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
4424 comp->gtDispTree(node, nullptr, nullptr, true);
4426 node->gtLsraInfo.dump(m_lsra);
4432 #endif // !LEGACY_BACKEND