1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XX Postconditions (for the nodes currently handled): XX
13 XX - All operands requiring a register are explicit in the graph XX
15 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
16 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
24 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
28 #if !defined(_TARGET_64BIT_)
29 #include "decomposelongs.h"
30 #endif // !defined(_TARGET_64BIT_)
32 //------------------------------------------------------------------------
33 // MakeSrcContained: Make "childNode" a contained node
36 // parentNode - is a non-leaf node that can contain its 'childNode'
37 // childNode - is an op that will now be contained by its parent.
40 // If 'childNode' it has any existing sources, they will now be sources for the parent.
42 void Lowering::MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode)
44 assert(!parentNode->OperIsLeaf());
45 assert(childNode->canBeContained());
46 childNode->SetContained();
49 //------------------------------------------------------------------------
50 // CheckImmedAndMakeContained: Checks if the 'childNode' is a containable immediate
51 // and, if so, makes it contained.
54 // parentNode - is any non-leaf node
55 // childNode - is an child op of 'parentNode'
58 // true if we are able to make childNode a contained immediate
60 bool Lowering::CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode)
62 assert(!parentNode->OperIsLeaf());
63 // If childNode is a containable immediate
64 if (IsContainableImmed(parentNode, childNode))
66 // then make it contained within the parentNode
67 MakeSrcContained(parentNode, childNode);
73 //------------------------------------------------------------------------
74 // IsSafeToContainMem: Checks for conflicts between childNode and parentNode,
75 // and returns 'true' iff memory operand childNode can be contained in parentNode.
78 // parentNode - any non-leaf node
79 // childNode - some node that is an input to `parentNode`
82 // true if it is safe to make childNode a contained memory operand.
84 bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode)
86 m_scratchSideEffects.Clear();
87 m_scratchSideEffects.AddNode(comp, childNode);
89 for (GenTree* node = childNode->gtNext; node != parentNode; node = node->gtNext)
91 if (m_scratchSideEffects.InterferesWith(comp, node, false))
100 //------------------------------------------------------------------------
101 // IsContainableMemoryOp: Checks whether this is a memory op that can be contained.
104 // node - the node of interest.
107 // True if this will definitely be a memory reference that could be contained.
110 // This differs from the isMemoryOp() method on GenTree because it checks for
111 // the case of doNotEnregister local. This won't include locals that
112 // for some other reason do not become register candidates, nor those that get
114 // Also, because we usually call this before we redo dataflow, any new lclVars
115 // introduced after the last dataflow analysis will not yet be marked lvTracked,
116 // so we don't use that.
118 bool Lowering::IsContainableMemoryOp(GenTree* node)
120 #ifdef _TARGET_XARCH_
121 if (node->isMemoryOp())
127 if (!m_lsra->enregisterLocalVars)
131 LclVarDsc* varDsc = &comp->lvaTable[node->AsLclVar()->gtLclNum];
132 return varDsc->lvDoNotEnregister;
134 #endif // _TARGET_XARCH_
138 //------------------------------------------------------------------------
140 // This is the main entry point for Lowering.
141 GenTree* Lowering::LowerNode(GenTree* node)
143 assert(node != nullptr);
144 switch (node->gtOper)
147 TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
148 ContainCheckIndir(node->AsIndir());
152 TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
153 if (!comp->codeGen->gcInfo.gcIsWriteBarrierAsgNode(node))
155 LowerStoreIndir(node->AsIndir());
161 GenTree* afterTransform = LowerAdd(node);
162 if (afterTransform != nullptr)
164 return afterTransform;
169 #if !defined(_TARGET_64BIT_)
179 ContainCheckBinary(node->AsOp());
182 #ifdef _TARGET_XARCH_
184 // Codegen of this tree node sets ZF and SF flags.
185 if (!varTypeIsFloating(node))
187 node->gtFlags |= GTF_ZSF_SET;
190 #endif // _TARGET_XARCH_
194 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
197 ContainCheckMul(node->AsOp());
202 if (!LowerUnsignedDivOrMod(node->AsOp()))
204 ContainCheckDivOrMod(node->AsOp());
210 return LowerSignedDivOrMod(node);
213 return LowerSwitch(node);
232 ContainCheckJTrue(node->AsOp());
236 LowerJmpMethod(node);
244 ContainCheckReturnTrap(node->AsOp());
251 #ifdef _TARGET_XARCH_
252 case GT_ARR_BOUNDS_CHECK:
255 #endif // FEATURE_SIMD
256 ContainCheckBoundsChk(node->AsBoundsChk());
258 #endif // _TARGET_XARCH_
260 return LowerArrElem(node);
263 ContainCheckArrOffset(node->AsArrOffs());
271 #ifndef _TARGET_64BIT_
274 ContainCheckShiftRotate(node->AsOp());
276 #endif // !_TARGET_64BIT_
281 #ifdef _TARGET_XARCH_
282 LowerShift(node->AsOp());
284 ContainCheckShiftRotate(node->AsOp());
290 case GT_STORE_DYN_BLK:
292 GenTreeBlk* blkNode = node->AsBlk();
293 TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
294 LowerBlockStore(blkNode);
299 ContainCheckLclHeap(node->AsOp());
302 #ifdef _TARGET_XARCH_
304 ContainCheckIntrinsic(node->AsOp());
306 #endif // _TARGET_XARCH_
310 LowerSIMD(node->AsSIMD());
315 WidenSIMD12IfNecessary(node->AsLclVarCommon());
318 case GT_STORE_LCL_VAR:
319 #if defined(_TARGET_AMD64_) && defined(FEATURE_SIMD)
321 GenTreeLclVarCommon* const store = node->AsLclVarCommon();
322 if ((store->TypeGet() == TYP_SIMD8) != (store->gtOp1->TypeGet() == TYP_SIMD8))
324 GenTreeUnOp* bitcast =
325 new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, store->TypeGet(), store->gtOp1, nullptr);
326 store->gtOp1 = bitcast;
327 BlockRange().InsertBefore(store, bitcast);
330 #endif // _TARGET_AMD64_
331 WidenSIMD12IfNecessary(node->AsLclVarCommon());
334 case GT_STORE_LCL_FLD:
335 // TODO-1stClassStructs: Once we remove the requirement that all struct stores
336 // are block stores (GT_STORE_BLK or GT_STORE_OBJ), here is where we would put the local
337 // store under a block store if codegen will require it.
338 if ((node->TypeGet() == TYP_STRUCT) && (node->gtGetOp1()->OperGet() != GT_PHI))
340 #if FEATURE_MULTIREG_RET
341 GenTree* src = node->gtGetOp1();
342 assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal());
343 #else // !FEATURE_MULTIREG_RET
344 assert(!"Unexpected struct local store in Lowering");
345 #endif // !FEATURE_MULTIREG_RET
347 LowerStoreLoc(node->AsLclVarCommon());
351 CheckImmedAndMakeContained(node, node->gtOp.gtOp2);
361 /** -- Switch Lowering --
362 * The main idea of switch lowering is to keep transparency of the register requirements of this node
363 * downstream in LSRA. Given that the switch instruction is inherently a control statement which in the JIT
364 * is represented as a simple tree node, at the time we actually generate code for it we end up
365 * generating instructions that actually modify the flow of execution that imposes complicated
366 * register requirement and lifetimes.
368 * So, for the purpose of LSRA, we want to have a more detailed specification of what a switch node actually
369 * means and more importantly, which and when do we need a register for each instruction we want to issue
370 * to correctly allocate them downstream.
372 * For this purpose, this procedure performs switch lowering in two different ways:
374 * a) Represent the switch statement as a zero-index jump table construct. This means that for every destination
375 * of the switch, we will store this destination in an array of addresses and the code generator will issue
376 * a data section where this array will live and will emit code that based on the switch index, will indirect and
377 * jump to the destination specified in the jump table.
379 * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch
380 * node for jump table based switches.
381 * The overall structure of a GT_SWITCH_TABLE is:
384 * |_________ localVar (a temporary local that holds the switch index)
385 * |_________ jumpTable (this is a special node that holds the address of the jump table array)
387 * Now, the way we morph a GT_SWITCH node into this lowered switch table node form is the following:
389 * Input: GT_SWITCH (inside a basic block whose Branch Type is BBJ_SWITCH)
390 * |_____ expr (an arbitrarily complex GT_NODE that represents the switch index)
392 * This gets transformed into the following statements inside a BBJ_COND basic block (the target would be
393 * the default case of the switch in case the conditional is evaluated to true).
395 * ----- original block, transformed
397 * |_____ tempLocal (a new temporary local variable used to store the switch index)
398 * |_____ expr (the index expression)
403 * |___ Int_Constant (This constant is the index of the default case
404 * that happens to be the highest index in the jump table).
405 * |___ tempLocal (The local variable were we stored the index expression).
407 * ----- new basic block
410 * |_____ jumpTable (a new jump table node that now LSRA can allocate registers for explicitly
411 * and LinearCodeGen will be responsible to generate downstream).
413 * This way there are no implicit temporaries.
415 * b) For small-sized switches, we will actually morph them into a series of conditionals of the form
416 * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case }
417 * (For the default case conditional, we'll be constructing the exact same code as the jump table case one).
418 * else if (case == firstCase){ goto jumpTable[1]; }
419 * else if (case == secondCase) { goto jumptable[2]; } and so on.
421 * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer
422 * require internal temporaries to maintain the index we're evaluating plus we're using existing code from
423 * LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and
424 * InstrGroups downstream.
427 GenTree* Lowering::LowerSwitch(GenTree* node)
431 BasicBlock** jumpTab;
433 assert(node->gtOper == GT_SWITCH);
435 // The first step is to build the default case conditional construct that is
436 // shared between both kinds of expansion of the switch node.
438 // To avoid confusion, we'll alias m_block to originalSwitchBB
439 // that represents the node we're morphing.
440 BasicBlock* originalSwitchBB = m_block;
441 LIR::Range& switchBBRange = LIR::AsRange(originalSwitchBB);
443 // jumpCnt is the number of elements in the jump table array.
444 // jumpTab is the actual pointer to the jump table array.
445 // targetCnt is the number of unique targets in the jump table array.
446 jumpCnt = originalSwitchBB->bbJumpSwt->bbsCount;
447 jumpTab = originalSwitchBB->bbJumpSwt->bbsDstTab;
448 targetCnt = originalSwitchBB->NumSucc(comp);
450 // GT_SWITCH must be a top-level node with no use.
454 assert(!switchBBRange.TryGetUse(node, &use));
458 JITDUMP("Lowering switch BB%02u, %d cases\n", originalSwitchBB->bbNum, jumpCnt);
460 // Handle a degenerate case: if the switch has only a default case, just convert it
461 // to an unconditional branch. This should only happen in minopts or with debuggable
465 JITDUMP("Lowering switch BB%02u: single target; converting to BBJ_ALWAYS\n", originalSwitchBB->bbNum);
466 noway_assert(comp->opts.MinOpts() || comp->opts.compDbgCode);
467 if (originalSwitchBB->bbNext == jumpTab[0])
469 originalSwitchBB->bbJumpKind = BBJ_NONE;
470 originalSwitchBB->bbJumpDest = nullptr;
474 originalSwitchBB->bbJumpKind = BBJ_ALWAYS;
475 originalSwitchBB->bbJumpDest = jumpTab[0];
477 // Remove extra predecessor links if there was more than one case.
478 for (unsigned i = 1; i < jumpCnt; ++i)
480 (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB);
483 // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign
484 // the result of the child subtree to a temp.
485 GenTree* rhs = node->gtOp.gtOp1;
487 unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Lowering is creating a new local variable"));
488 comp->lvaSortAgain = true;
489 comp->lvaTable[lclNum].lvType = rhs->TypeGet();
490 comp->lvaTable[lclNum].lvRefCnt = 1;
492 GenTreeLclVar* store =
493 new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, rhs->TypeGet(), lclNum, BAD_IL_OFFSET);
495 store->gtFlags = (rhs->gtFlags & GTF_COMMON_MASK);
496 store->gtFlags |= GTF_VAR_DEF;
498 switchBBRange.InsertAfter(node, store);
499 switchBBRange.Remove(node);
504 noway_assert(jumpCnt >= 2);
506 // Spill the argument to the switch node into a local so that it can be used later.
507 unsigned blockWeight = originalSwitchBB->getBBWeight(comp);
509 LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node);
510 ReplaceWithLclVar(use);
512 // GT_SWITCH(indexExpression) is now two statements:
513 // 1. a statement containing 'asg' (for temp = indexExpression)
514 // 2. and a statement with GT_SWITCH(temp)
516 assert(node->gtOper == GT_SWITCH);
517 GenTreePtr temp = node->gtOp.gtOp1;
518 assert(temp->gtOper == GT_LCL_VAR);
519 unsigned tempLclNum = temp->gtLclVarCommon.gtLclNum;
520 LclVarDsc* tempVarDsc = comp->lvaTable + tempLclNum;
521 var_types tempLclType = temp->TypeGet();
523 BasicBlock* defaultBB = jumpTab[jumpCnt - 1];
524 BasicBlock* followingBB = originalSwitchBB->bbNext;
526 /* Is the number of cases right for a test and jump switch? */
527 const bool fFirstCaseFollows = (followingBB == jumpTab[0]);
528 const bool fDefaultFollows = (followingBB == defaultBB);
530 unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
532 // This means really just a single cmp/jcc (aka a simple if/else)
533 if (fFirstCaseFollows || fDefaultFollows)
535 minSwitchTabJumpCnt++;
538 #if defined(_TARGET_ARM_)
539 // On ARM for small switch tables we will
540 // generate a sequence of compare and branch instructions
541 // because the code to load the base of the switch
542 // table is huge and hideous due to the relocation... :(
543 minSwitchTabJumpCnt += 2;
544 #endif // _TARGET_ARM_
546 // Once we have the temporary variable, we construct the conditional branch for
547 // the default case. As stated above, this conditional is being shared between
548 // both GT_SWITCH lowering code paths.
549 // This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; }
550 GenTreePtr gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
551 comp->gtNewIconNode(jumpCnt - 2, genActualType(tempLclType)));
553 // Make sure we perform an unsigned comparison, just in case the switch index in 'temp'
554 // is now less than zero 0 (that would also hit the default case).
555 gtDefaultCaseCond->gtFlags |= GTF_UNSIGNED;
557 /* Increment the lvRefCnt and lvRefCntWtd for temp */
558 tempVarDsc->incRefCnts(blockWeight, comp);
560 GenTreePtr gtDefaultCaseJump = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtDefaultCaseCond);
561 gtDefaultCaseJump->gtFlags = node->gtFlags;
563 LIR::Range condRange = LIR::SeqTree(comp, gtDefaultCaseJump);
564 switchBBRange.InsertAtEnd(std::move(condRange));
566 BasicBlock* afterDefaultCondBlock = comp->fgSplitBlockAfterNode(originalSwitchBB, condRange.LastNode());
568 // afterDefaultCondBlock is now the switch, and all the switch targets have it as a predecessor.
569 // originalSwitchBB is now a BBJ_NONE, and there is a predecessor edge in afterDefaultCondBlock
570 // representing the fall-through flow from originalSwitchBB.
571 assert(originalSwitchBB->bbJumpKind == BBJ_NONE);
572 assert(originalSwitchBB->bbNext == afterDefaultCondBlock);
573 assert(afterDefaultCondBlock->bbJumpKind == BBJ_SWITCH);
574 assert(afterDefaultCondBlock->bbJumpSwt->bbsHasDefault);
575 assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet.
577 // The GT_SWITCH code is still in originalSwitchBB (it will be removed later).
579 // Turn originalSwitchBB into a BBJ_COND.
580 originalSwitchBB->bbJumpKind = BBJ_COND;
581 originalSwitchBB->bbJumpDest = jumpTab[jumpCnt - 1];
583 // Fix the pred for the default case: the default block target still has originalSwitchBB
584 // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point
585 // to afterDefaultCondBlock.
586 flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock);
587 comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge);
589 // If we originally had 2 unique successors, check to see whether there is a unique
590 // non-default case, in which case we can eliminate the switch altogether.
591 // Note that the single unique successor case is handled above.
592 BasicBlock* uniqueSucc = nullptr;
595 uniqueSucc = jumpTab[0];
596 noway_assert(jumpCnt >= 2);
597 for (unsigned i = 1; i < jumpCnt - 1; i++)
599 if (jumpTab[i] != uniqueSucc)
601 uniqueSucc = nullptr;
606 if (uniqueSucc != nullptr)
608 // If the unique successor immediately follows this block, we have nothing to do -
609 // it will simply fall-through after we remove the switch, below.
610 // Otherwise, make this a BBJ_ALWAYS.
611 // Now, fixup the predecessor links to uniqueSucc. In the original jumpTab:
612 // jumpTab[i-1] was the default target, which we handled above,
613 // jumpTab[0] is the first target, and we'll leave that predecessor link.
614 // Remove any additional predecessor links to uniqueSucc.
615 for (unsigned i = 1; i < jumpCnt - 1; ++i)
617 assert(jumpTab[i] == uniqueSucc);
618 (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock);
620 if (afterDefaultCondBlock->bbNext == uniqueSucc)
622 afterDefaultCondBlock->bbJumpKind = BBJ_NONE;
623 afterDefaultCondBlock->bbJumpDest = nullptr;
627 afterDefaultCondBlock->bbJumpKind = BBJ_ALWAYS;
628 afterDefaultCondBlock->bbJumpDest = uniqueSucc;
631 // If the number of possible destinations is small enough, we proceed to expand the switch
632 // into a series of conditional branches, otherwise we follow the jump table based switch
634 else if ((jumpCnt < minSwitchTabJumpCnt) || comp->compStressCompile(Compiler::STRESS_SWITCH_CMP_BR_EXPANSION, 50))
636 // Lower the switch into a series of compare and branch IR trees.
638 // In this case we will morph the node in the following way:
639 // 1. Generate a JTRUE statement to evaluate the default case. (This happens above.)
640 // 2. Start splitting the switch basic block into subsequent basic blocks, each of which will contain
641 // a statement that is responsible for performing a comparison of the table index and conditional
644 JITDUMP("Lowering switch BB%02u: using compare/branch expansion\n", originalSwitchBB->bbNum);
646 // We'll use 'afterDefaultCondBlock' for the first conditional. After that, we'll add new
647 // blocks. If we end up not needing it at all (say, if all the non-default cases just fall through),
649 bool fUsedAfterDefaultCondBlock = false;
650 BasicBlock* currentBlock = afterDefaultCondBlock;
651 LIR::Range* currentBBRange = &LIR::AsRange(currentBlock);
653 // Walk to entries 0 to jumpCnt - 1. If a case target follows, ignore it and let it fall through.
654 // If no case target follows, the last one doesn't need to be a compare/branch: it can be an
655 // unconditional branch.
656 bool fAnyTargetFollows = false;
657 for (unsigned i = 0; i < jumpCnt - 1; ++i)
659 assert(currentBlock != nullptr);
661 // Remove the switch from the predecessor list of this case target's block.
662 // We'll add the proper new predecessor edge later.
663 flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock);
665 if (jumpTab[i] == followingBB)
667 // This case label follows the switch; let it fall through.
668 fAnyTargetFollows = true;
672 // We need a block to put in the new compare and/or branch.
673 // If we haven't used the afterDefaultCondBlock yet, then use that.
674 if (fUsedAfterDefaultCondBlock)
676 BasicBlock* newBlock = comp->fgNewBBafter(BBJ_NONE, currentBlock, true);
677 comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor.
678 currentBlock = newBlock;
679 currentBBRange = &LIR::AsRange(currentBlock);
683 assert(currentBlock == afterDefaultCondBlock);
684 fUsedAfterDefaultCondBlock = true;
687 // We're going to have a branch, either a conditional or unconditional,
688 // to the target. Set the target.
689 currentBlock->bbJumpDest = jumpTab[i];
691 // Wire up the predecessor list for the "branch" case.
692 comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge);
694 if (!fAnyTargetFollows && (i == jumpCnt - 2))
696 // We're processing the last one, and there is no fall through from any case
697 // to the following block, so we can use an unconditional branch to the final
698 // case: there is no need to compare against the case index, since it's
699 // guaranteed to be taken (since the default case was handled first, above).
701 currentBlock->bbJumpKind = BBJ_ALWAYS;
705 // Otherwise, it's a conditional branch. Set the branch kind, then add the
706 // condition statement.
707 currentBlock->bbJumpKind = BBJ_COND;
709 // Now, build the conditional statement for the current case that is
714 // |____ (switchIndex) (The temp variable)
715 // |____ (ICon) (The actual case constant)
716 GenTreePtr gtCaseCond =
717 comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
718 comp->gtNewIconNode(i, TYP_INT));
719 /* Increment the lvRefCnt and lvRefCntWtd for temp */
720 tempVarDsc->incRefCnts(blockWeight, comp);
722 GenTreePtr gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
723 LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch);
724 currentBBRange->InsertAtEnd(std::move(caseRange));
728 if (fAnyTargetFollows)
730 // There is a fall-through to the following block. In the loop
731 // above, we deleted all the predecessor edges from the switch.
732 // In this case, we need to add one back.
733 comp->fgAddRefPred(currentBlock->bbNext, currentBlock);
736 if (!fUsedAfterDefaultCondBlock)
738 // All the cases were fall-through! We don't need this block.
739 // Convert it from BBJ_SWITCH to BBJ_NONE and unset the BBF_DONT_REMOVE flag
740 // so fgRemoveBlock() doesn't complain.
741 JITDUMP("Lowering switch BB%02u: all switch cases were fall-through\n", originalSwitchBB->bbNum);
742 assert(currentBlock == afterDefaultCondBlock);
743 assert(currentBlock->bbJumpKind == BBJ_SWITCH);
744 currentBlock->bbJumpKind = BBJ_NONE;
745 currentBlock->bbFlags &= ~BBF_DONT_REMOVE;
746 comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block.
751 // Lower the switch into an indirect branch using a jump table:
753 // 1. Create the constant for the default case
754 // 2. Generate a GT_GE condition to compare to the default case
755 // 3. Generate a GT_JTRUE to jump.
756 // 4. Load the jump table address into a local (presumably the just
757 // created constant for GT_SWITCH).
758 // 5. Create a new node for the lowered switch, this will both generate
759 // the branch table and also will be responsible for the indirect
762 JITDUMP("Lowering switch BB%02u: using jump table expansion\n", originalSwitchBB->bbNum);
764 GenTree* switchValue = comp->gtNewLclvNode(tempLclNum, tempLclType);
765 #ifdef _TARGET_64BIT_
766 if (tempLclType != TYP_I_IMPL)
768 // Note that the switch value is unsigned so the cast should be unsigned as well.
769 switchValue = comp->gtNewCastNode(TYP_I_IMPL, switchValue, TYP_U_IMPL);
772 GenTreePtr gtTableSwitch =
773 comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, switchValue, comp->gtNewJmpTableNode());
774 /* Increment the lvRefCnt and lvRefCntWtd for temp */
775 tempVarDsc->incRefCnts(blockWeight, comp);
777 // this block no longer branches to the default block
778 afterDefaultCondBlock->bbJumpSwt->removeDefault();
779 comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock);
781 LIR::Range& afterDefaultCondBBRange = LIR::AsRange(afterDefaultCondBlock);
782 afterDefaultCondBBRange.InsertAtEnd(LIR::SeqTree(comp, gtTableSwitch));
785 GenTree* next = node->gtNext;
787 // Get rid of the GT_SWITCH(temp).
788 switchBBRange.Remove(node->gtOp.gtOp1);
789 switchBBRange.Remove(node);
794 // NOTE: this method deliberately does not update the call arg table. It must only
795 // be used by NewPutArg and LowerArg; these functions are responsible for updating
796 // the call arg table as necessary.
797 void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCopy)
799 assert(argSlot != nullptr);
800 assert(*argSlot != nullptr);
801 assert(putArgOrCopy->OperIsPutArg() || putArgOrCopy->OperIs(GT_COPY));
803 GenTree* arg = *argSlot;
805 // Replace the argument with the putarg/copy
806 *argSlot = putArgOrCopy;
807 putArgOrCopy->gtOp.gtOp1 = arg;
809 // Insert the putarg/copy into the block
810 BlockRange().InsertAfter(arg, putArgOrCopy);
813 //------------------------------------------------------------------------
814 // NewPutArg: rewrites the tree to put an arg in a register or on the stack.
817 // call - the call whose arg is being rewritten.
818 // arg - the arg being rewritten.
819 // info - the fgArgTabEntry information for the argument.
820 // type - the type of the argument.
823 // The new tree that was created to put the arg in the right place
824 // or the incoming arg if the arg tree was not rewritten.
827 // call, arg, and info must be non-null.
830 // For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined)
831 // this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs
832 // for two eightbyte structs.
834 // For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
835 // (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GC pointers count and the pointers
836 // layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
837 // (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
839 GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr info, var_types type)
841 assert(call != nullptr);
842 assert(arg != nullptr);
843 assert(info != nullptr);
845 GenTreePtr putArg = nullptr;
846 bool updateArgTable = true;
848 bool isOnStack = true;
849 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
850 if (varTypeIsStruct(type))
852 isOnStack = !info->structDesc.passedInRegisters;
856 isOnStack = info->regNum == REG_STK;
858 #else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
859 isOnStack = info->regNum == REG_STK;
860 #endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
862 #ifdef _TARGET_ARMARCH_
863 // Mark contained when we pass struct
864 // GT_FIELD_LIST is always marked conatained when it is generated
865 if (varTypeIsStruct(type))
868 if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR))
870 MakeSrcContained(arg, arg->AsObj()->Addr());
876 // Struct can be split into register(s) and stack on ARM
879 assert(arg->OperGet() == GT_OBJ || arg->OperGet() == GT_FIELD_LIST);
880 // TODO: Need to check correctness for FastTailCall
881 if (call->IsFastTailCall())
883 NYI_ARM("lower: struct argument by fast tail call");
886 putArg = new (comp, GT_PUTARG_SPLIT)
887 GenTreePutArgSplit(arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), info->numRegs,
888 call->IsFastTailCall(), call);
889 putArg->gtRegNum = info->regNum;
891 // If struct argument is morphed to GT_FIELD_LIST node(s),
892 // we can know GC info by type of each GT_FIELD_LIST node.
893 // So we skip setting GC Pointer info.
895 GenTreePutArgSplit* argSplit = putArg->AsPutArgSplit();
896 if (arg->OperGet() == GT_OBJ)
898 BYTE* gcLayout = nullptr;
899 unsigned numRefs = 0;
900 GenTreeObj* argObj = arg->AsObj();
902 if (argObj->IsGCInfoInitialized())
904 gcLayout = argObj->gtGcPtrs;
905 numRefs = argObj->GetGcPtrCount();
909 // Set GC Pointer info
910 gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots + info->numRegs];
911 numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
912 argSplit->setGcPointers(numRefs, gcLayout);
915 // Set type of registers
916 for (unsigned index = 0; index < info->numRegs; index++)
918 var_types regType = comp->getJitGCType(gcLayout[index]);
919 argSplit->m_regType[index] = regType;
924 GenTreeFieldList* fieldListPtr = arg->AsFieldList();
925 for (unsigned index = 0; index < info->numRegs; fieldListPtr = fieldListPtr->Rest(), index++)
927 var_types regType = fieldListPtr->gtGetOp1()->TypeGet();
928 argSplit->m_regType[index] = regType;
930 // Clear the register assignments on the fieldList nodes, as these are contained.
931 fieldListPtr->gtRegNum = REG_NA;
936 #endif // _TARGET_ARM_
940 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
943 // The following code makes sure a register passed struct arg is moved to
944 // the register before the call is made.
945 // There are two cases (comments added in the code below.)
946 // 1. The struct is of size one eightbyte:
947 // In this case a new tree is created that is GT_PUTARG_REG
948 // with a op1 the original argument.
949 // 2. The struct is contained in 2 eightbytes:
950 // in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs
951 // - the two eightbytes of the struct.
952 // The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST
953 // and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
955 assert(info->structDesc.eightByteCount != 0);
957 if (info->structDesc.eightByteCount == 1)
960 // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
962 // Here the IR for this operation:
964 // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
965 // N003(6, 5)[000052] * --XG------ - / --* indir int
966 // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
967 // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
968 // N009(3, 4)[000054] ------ - N----arg0 in rdi + --* lclFld int V02 tmp0[+0](last use)
969 // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
972 // lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
975 // lowering arg : N009(3, 4)[000054] ------ - N---- * lclFld int V02 tmp0[+0](last use)
976 // new node is : (3, 4)[000071] ------------ * putarg_reg int RV
979 // N001(3, 2)[000017] ------ - N---- / --* &lclVar byref V00 loc0
980 // N003(6, 5)[000052] * --XG------ - / --* indir int
981 // N004(3, 2)[000046] ------ - N---- + --* &lclVar byref V02 tmp0
982 // (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --* storeIndir int
983 // N009(3, 4)[000054] ------ - N---- | / --* lclFld int V02 tmp0[+0](last use)
984 // (3, 4)[000071] ------------arg0 in rdi + --* putarg_reg int RV
985 // N011(33, 21)[000018] --CXG------ - *call void Test.Foo.test1
989 putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
991 else if (info->structDesc.eightByteCount == 2)
994 // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
997 // N001(3, 2) [000025] ------ - N----Source / --* &lclVar byref V01 loc1
998 // N003(3, 2) [000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
999 // N006(1, 1) [000058] ------------ + --* const int 16
1000 // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
1001 // N009(3, 4) [000061] ------ - N----arg0 in rdi + --* lclFld long V03 tmp1[+0]
1002 // N010(3, 4) [000063] ------------arg0 in rsi + --* lclFld long V03 tmp1[+8](last use)
1003 // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
1006 // lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk void
1009 // lowering arg : N012(11, 13)[000065] ------------ * <list> struct
1012 // N001(3, 2)[000025] ------ - N----Source / --* &lclVar byref V01 loc1
1013 // N003(3, 2)[000056] ------ - N----Destination + --* &lclVar byref V03 tmp1
1014 // N006(1, 1)[000058] ------------ + --* const int 16
1015 // N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --* copyBlk void
1016 // N009(3, 4)[000061] ------ - N---- | / --* lclFld long V03 tmp1[+0]
1017 // (3, 4)[000072] ------------arg0 in rdi + --* putarg_reg long
1018 // N010(3, 4)[000063] ------------ | / --* lclFld long V03 tmp1[+8](last use)
1019 // (3, 4)[000073] ------------arg0 in rsi + --* putarg_reg long
1020 // N014(40, 31)[000026] --CXG------ - *call void Test.Foo.test2
1024 assert(arg->OperGet() == GT_FIELD_LIST);
1026 GenTreeFieldList* fieldListPtr = arg->AsFieldList();
1027 assert(fieldListPtr->IsFieldListHead());
1029 for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
1031 // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
1032 GenTreePtr newOper = comp->gtNewPutArgReg(
1033 comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
1034 info->structDesc.eightByteSizes[ctr]),
1035 fieldListPtr->gtOp.gtOp1, (ctr == 0) ? info->regNum : info->otherRegNum);
1037 // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
1038 ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
1040 // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
1041 fieldListPtr->gtRegNum = REG_NA;
1044 // Just return arg. The GT_FIELD_LIST is not replaced.
1045 // Nothing more to do.
1050 assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes
1055 #else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1056 #if FEATURE_MULTIREG_ARGS
1057 if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST))
1059 assert(arg->OperGet() == GT_FIELD_LIST);
1061 GenTreeFieldList* fieldListPtr = arg->AsFieldList();
1062 assert(fieldListPtr->IsFieldListHead());
1064 // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
1065 regNumber argReg = info->regNum;
1066 for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
1068 GenTreePtr curOp = fieldListPtr->gtOp.gtOp1;
1069 var_types curTyp = curOp->TypeGet();
1071 // Create a new GT_PUTARG_REG node with op1
1072 GenTreePtr newOper = comp->gtNewPutArgReg(curTyp, curOp, argReg);
1074 // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
1075 ReplaceArgWithPutArgOrCopy(&fieldListPtr->gtOp.gtOp1, newOper);
1077 // Update argReg for the next putarg_reg (if any)
1078 argReg = genRegArgNext(argReg);
1080 #if defined(_TARGET_ARM_)
1081 // A double register is modelled as an even-numbered single one
1082 if (fieldListPtr->Current()->TypeGet() == TYP_DOUBLE)
1084 argReg = genRegArgNext(argReg);
1086 #endif // _TARGET_ARM_
1087 // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
1088 fieldListPtr->gtRegNum = REG_NA;
1091 // Just return arg. The GT_FIELD_LIST is not replaced.
1092 // Nothing more to do.
1096 #endif // FEATURE_MULTIREG_ARGS
1097 #endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1099 putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
1104 // Mark this one as tail call arg if it is a fast tail call.
1105 // This provides the info to put this argument in in-coming arg area slot
1106 // instead of in out-going arg area slot.
1108 PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is correct
1110 putArg = new (comp, GT_PUTARG_STK)
1111 GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
1112 call->IsFastTailCall(), call);
1114 #ifdef FEATURE_PUT_STRUCT_ARG_STK
1115 // If the ArgTabEntry indicates that this arg is a struct
1116 // get and store the number of slots that are references.
1117 // This is later used in the codegen for PUT_ARG_STK implementation
1118 // for struct to decide whether and how many single eight-byte copies
1119 // to be done (only for reference slots), so gcinfo is emitted.
1120 // For non-reference slots faster/smaller size instructions are used -
1121 // pair copying using XMM registers or rep mov instructions.
1124 // We use GT_OBJ for non-SIMD struct arguments. However, for
1125 // SIMD arguments the GT_OBJ has already been transformed.
1126 if (arg->gtOper != GT_OBJ)
1128 assert(varTypeIsSIMD(arg));
1132 unsigned numRefs = 0;
1133 BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
1134 assert(!varTypeIsSIMD(arg));
1135 numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
1136 putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
1139 // On x86 VM lies about the type of a struct containing a pointer sized
1140 // integer field by returning the type of its field as the type of struct.
1141 // Such struct can be passed in a register depending its position in
1142 // parameter list. VM does this unwrapping only one level and therefore
1143 // a type like Struct Foo { Struct Bar { int f}} awlays needs to be
1144 // passed on stack. Also, VM doesn't lie about type of such a struct
1145 // when it is a field of another struct. That is VM doesn't lie about
1146 // the type of Foo.Bar
1148 // We now support the promotion of fields that are of type struct.
1149 // However we only support a limited case where the struct field has a
1150 // single field and that single field must be a scalar type. Say Foo.Bar
1151 // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT,
1152 // as per x86 ABI it should always be passed on stack. Therefore GenTree
1153 // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where
1154 // local v1 could be a promoted field standing for Foo.Bar. Note that
1155 // the type of v1 will be the type of field of Foo.Bar.f when Foo is
1156 // promoted. That is v1 will be a scalar type. In this case we need to
1157 // pass v1 on stack instead of in a register.
1159 // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is
1160 // a scalar type and the width of GT_OBJ matches the type size of v1.
1161 // Note that this cannot be done till call node arguments are morphed
1162 // because we should not lose the fact that the type of argument is
1163 // a struct so that the arg gets correctly marked to be passed on stack.
1164 GenTree* objOp1 = arg->gtGetOp1();
1165 if (objOp1->OperGet() == GT_LCL_VAR_ADDR)
1167 unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum();
1168 if (comp->lvaTable[lclNum].lvType != TYP_STRUCT)
1170 comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr));
1173 #endif // _TARGET_X86_
1176 #endif // FEATURE_PUT_STRUCT_ARG_STK
1180 JITDUMP("new node is : ");
1184 if (arg->gtFlags & GTF_LATE_ARG)
1186 putArg->gtFlags |= GTF_LATE_ARG;
1188 else if (updateArgTable)
1190 info->node = putArg;
1195 //------------------------------------------------------------------------
1196 // LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between
1197 // the argument evaluation and the call. This is the point at which the source is
1198 // consumed and the value transitions from control of the register allocator to the calling
1202 // call - The call node
1203 // ppArg - Pointer to the call argument pointer. We might replace the call argument by
1209 void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
1211 GenTreePtr arg = *ppArg;
1213 JITDUMP("lowering arg : ");
1216 // No assignments should remain by Lowering.
1217 assert(!arg->OperIsAssignment());
1218 assert(!arg->OperIsPutArgStk());
1220 // Assignments/stores at this level are not really placing an argument.
1221 // They are setting up temporary locals that will later be placed into
1222 // outgoing regs or stack.
1223 if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
1228 fgArgTabEntryPtr info = comp->gtArgEntryByNode(call, arg);
1229 assert(info->node == arg);
1230 bool isReg = (info->regNum != REG_STK);
1231 var_types type = arg->TypeGet();
1233 if (varTypeIsSmall(type))
1235 // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
1239 #if defined(FEATURE_SIMD)
1240 #if defined(_TARGET_X86_)
1241 // Non-param TYP_SIMD12 local var nodes are massaged in Lower to TYP_SIMD16 to match their
1242 // allocated size (see lvSize()). However, when passing the variables as arguments, and
1243 // storing the variables to the outgoing argument area on the stack, we must use their
1244 // actual TYP_SIMD12 type, so exactly 12 bytes is allocated and written.
1245 if (type == TYP_SIMD16)
1247 if ((arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_STORE_LCL_VAR))
1249 unsigned varNum = arg->AsLclVarCommon()->GetLclNum();
1250 LclVarDsc* varDsc = &comp->lvaTable[varNum];
1251 type = varDsc->lvType;
1253 else if (arg->OperGet() == GT_SIMD)
1255 assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12));
1257 if (arg->AsSIMD()->gtSIMDSize == 12)
1263 #elif defined(_TARGET_AMD64_)
1264 // TYP_SIMD8 parameters that are passed as longs
1265 if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum))
1267 GenTreeUnOp* bitcast = new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, TYP_LONG, arg, nullptr);
1268 BlockRange().InsertAfter(arg, bitcast);
1270 info->node = *ppArg = arg = bitcast;
1273 #endif // defined(_TARGET_X86_)
1274 #endif // defined(FEATURE_SIMD)
1278 // If we hit this we are probably double-lowering.
1279 assert(!arg->OperIsPutArg());
1281 #if !defined(_TARGET_64BIT_)
1282 if (varTypeIsLong(type))
1286 noway_assert(arg->OperGet() == GT_LONG);
1287 assert(info->numRegs == 2);
1289 GenTreePtr argLo = arg->gtGetOp1();
1290 GenTreePtr argHi = arg->gtGetOp2();
1292 GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
1293 (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
1295 putArg = NewPutArg(call, fieldList, info, TYP_VOID);
1296 putArg->gtRegNum = info->regNum;
1298 BlockRange().InsertBefore(arg, putArg);
1299 BlockRange().Remove(arg);
1301 info->node = fieldList;
1305 // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK.
1306 // Although the hi argument needs to be pushed first, that will be handled by the general case,
1307 // in which the fields will be reversed.
1308 noway_assert(arg->OperGet() == GT_LONG);
1309 assert(info->numSlots == 2);
1310 GenTreePtr argLo = arg->gtGetOp1();
1311 GenTreePtr argHi = arg->gtGetOp2();
1312 GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
1313 // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
1314 (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
1315 putArg = NewPutArg(call, fieldList, info, TYP_VOID);
1316 putArg->gtRegNum = info->regNum;
1318 // We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg.
1319 BlockRange().InsertBefore(arg, fieldList, putArg);
1320 BlockRange().Remove(arg);
1325 #endif // !defined(_TARGET_64BIT_)
1328 #ifdef _TARGET_ARMARCH_
1329 // For vararg call or on armel, reg args should be all integer.
1330 // Insert a copy to move float value to integer register.
1331 if ((call->IsVarargs() || comp->opts.compUseSoftFP) && varTypeIsFloating(type))
1333 var_types intType = (type == TYP_DOUBLE) ? TYP_LONG : TYP_INT;
1335 GenTreePtr intArg = new (comp, GT_COPY) GenTreeCopyOrReload(GT_COPY, intType, arg);
1337 if (comp->opts.compUseSoftFP)
1339 intArg->gtFlags |= GTF_VAR_DEATH;
1342 info->node = intArg;
1343 ReplaceArgWithPutArgOrCopy(ppArg, intArg);
1345 // Update arg/type with new ones.
1351 putArg = NewPutArg(call, arg, info, type);
1353 // In the case of register passable struct (in one or two registers)
1354 // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.)
1355 // If an extra node is returned, splice it in the right place in the tree.
1358 ReplaceArgWithPutArgOrCopy(ppArg, putArg);
1363 // do lowering steps for each arg of a call
1364 void Lowering::LowerArgsForCall(GenTreeCall* call)
1366 JITDUMP("objp:\n======\n");
1367 if (call->gtCallObjp)
1369 LowerArg(call, &call->gtCallObjp);
1372 GenTreeArgList* args = call->gtCallArgs;
1374 JITDUMP("\nargs:\n======\n");
1375 for (; args; args = args->Rest())
1377 LowerArg(call, &args->Current());
1380 JITDUMP("\nlate:\n======\n");
1381 for (args = call->gtCallLateArgs; args; args = args->Rest())
1383 LowerArg(call, &args->Current());
1387 // helper that create a node representing a relocatable physical address computation
1388 // (optionally specifying the register to place it in)
1389 GenTree* Lowering::AddrGen(ssize_t addr, regNumber reg)
1391 // this should end up in codegen as : instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, addr)
1392 GenTree* result = comp->gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
1394 result->gtRegNum = reg;
1399 // variant that takes a void*
1400 GenTree* Lowering::AddrGen(void* addr, regNumber reg)
1402 return AddrGen((ssize_t)addr, reg);
1405 // do lowering steps for a call
1407 // - adding the placement nodes (either stack or register variety) for arguments
1408 // - lowering the expression that calculates the target address
1409 // - adding nodes for other operations that occur after the call sequence starts and before
1410 // control transfer occurs (profiling and tail call helpers, pinvoke incantations)
1412 void Lowering::LowerCall(GenTree* node)
1414 GenTreeCall* call = node->AsCall();
1416 JITDUMP("lowering call (before):\n");
1417 DISPTREERANGE(BlockRange(), call);
1420 call->ClearOtherRegs();
1421 LowerArgsForCall(call);
1423 // note that everything generated from this point on runs AFTER the outgoing args are placed
1424 GenTree* result = nullptr;
1426 // for x86, this is where we record ESP for checking later to make sure stack is balanced
1428 // Check for Delegate.Invoke(). If so, we inline it. We get the
1429 // target-object and target-function from the delegate-object, and do
1430 // an indirect call.
1431 if (call->IsDelegateInvoke())
1433 result = LowerDelegateInvoke(call);
1437 // Virtual and interface calls
1438 switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
1440 case GTF_CALL_VIRT_STUB:
1441 result = LowerVirtualStubCall(call);
1444 case GTF_CALL_VIRT_VTABLE:
1445 // stub dispatching is off or this is not a virtual call (could be a tailcall)
1446 result = LowerVirtualVtableCall(call);
1449 case GTF_CALL_NONVIRT:
1450 if (call->IsUnmanaged())
1452 result = LowerNonvirtPinvokeCall(call);
1454 else if (call->gtCallType == CT_INDIRECT)
1456 result = LowerIndirectNonvirtCall(call);
1460 result = LowerDirectCall(call);
1465 noway_assert(!"strange call type");
1470 if (call->IsTailCallViaHelper())
1472 // Either controlExpr or gtCallAddr must contain real call target.
1473 if (result == nullptr)
1475 assert(call->gtCallType == CT_INDIRECT);
1476 assert(call->gtCallAddr != nullptr);
1477 result = call->gtCallAddr;
1480 result = LowerTailCallViaHelper(call, result);
1482 else if (call->IsFastTailCall())
1484 LowerFastTailCall(call);
1487 if (result != nullptr)
1489 LIR::Range resultRange = LIR::SeqTree(comp, result);
1491 JITDUMP("results of lowering call:\n");
1492 DISPRANGE(resultRange);
1494 GenTree* insertionPoint = call;
1495 if (!call->IsTailCallViaHelper())
1497 // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist
1499 // TODO-LIR: find out what's really required here, as this is currently a tree order
1501 if (call->gtCallType == CT_INDIRECT)
1503 bool isClosed = false;
1504 if (call->gtCallCookie != nullptr)
1507 GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
1509 assert(call->gtCallCookie->Precedes(firstCallAddrNode));
1512 insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode();
1515 else if (call->gtCallAddr != nullptr)
1517 insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
1523 ContainCheckRange(resultRange);
1524 BlockRange().InsertBefore(insertionPoint, std::move(resultRange));
1526 call->gtControlExpr = result;
1529 if (comp->opts.IsJit64Compat())
1531 CheckVSQuirkStackPaddingNeeded(call);
1534 ContainCheckCallOperands(call);
1535 JITDUMP("lowering call (after):\n");
1536 DISPTREERANGE(BlockRange(), call);
1540 // Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14),
1541 // we still need this quirk for desktop so that older version of VS (e.g. VS2010/2012)
1542 // continues to work.
1543 // This quirk is excluded from other targets that have no back compat burden.
1545 // Quirk for VS debug-launch scenario to work:
1546 // See if this is a PInvoke call with exactly one param that is the address of a struct local.
1547 // In such a case indicate to frame-layout logic to add 16-bytes of padding
1548 // between save-reg area and locals. This is to protect against the buffer
1549 // overrun bug in microsoft.intellitrace.11.0.0.dll!ProfilerInterop.InitInterop().
1551 // A work-around to this bug is to disable IntelliTrace debugging
1552 // (VS->Tools->Options->IntelliTrace->Enable IntelliTrace - uncheck this option).
1553 // The reason why this works on Jit64 is that at the point of AV the call stack is
1555 // GetSystemInfo() Native call
1556 // IL_Stub generated for PInvoke declaration.
1557 // ProfilerInterface::InitInterop()
1558 // ProfilerInterface.Cctor()
1561 // The cctor body has just the call to InitInterop(). VM asm worker is holding
1562 // something in rbx that is used immediately after the Cctor call. Jit64 generated
1563 // InitInterop() method is pushing the registers in the following order
1573 // Due to buffer overrun, rbx doesn't get impacted. Whereas RyuJIT jitted code of
1574 // the same method is pushing regs in the following order
1582 // Therefore as a fix, we add padding between save-reg area and locals to
1583 // make this scenario work against JB.
1585 // Note: If this quirk gets broken due to other JIT optimizations, we should consider
1586 // more tolerant fix. One such fix is to padd the struct.
1587 void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
1589 assert(comp->opts.IsJit64Compat());
1591 #ifdef _TARGET_AMD64_
1592 // Confine this to IL stub calls which aren't marked as unmanaged.
1593 if (call->IsPInvoke() && !call->IsUnmanaged())
1595 bool paddingNeeded = false;
1596 GenTreePtr firstPutArgReg = nullptr;
1597 for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
1599 GenTreePtr tmp = args->Current();
1600 if (tmp->OperGet() == GT_PUTARG_REG)
1602 if (firstPutArgReg == nullptr)
1604 firstPutArgReg = tmp;
1605 GenTreePtr op1 = firstPutArgReg->gtOp.gtOp1;
1607 if (op1->OperGet() == GT_LCL_VAR_ADDR)
1609 unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
1610 // TODO-1stClassStructs: This is here to duplicate previous behavior,
1611 // but is not needed because the scenario being quirked did not involve
1612 // a SIMD or enregisterable struct.
1613 // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT)
1614 if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet()))
1616 // First arg is addr of a struct local.
1617 paddingNeeded = true;
1621 // Not a struct local.
1622 assert(paddingNeeded == false);
1628 // First arg is not a local var addr.
1629 assert(paddingNeeded == false);
1635 // Has more than one arg.
1636 paddingNeeded = false;
1644 comp->compVSQuirkStackPaddingNeeded = VSQUIRK_STACK_PAD;
1647 #endif // _TARGET_AMD64_
1650 // Inserts profiler hook, GT_PROF_HOOK for a tail call node.
1653 // We need to insert this after all nested calls, but before all the arguments to this call have been set up.
1654 // To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before
1655 // that. If there are no args, then it should be inserted before the call node.
1658 // * stmtExpr void (top level) (IL 0x000...0x010)
1659 // arg0 SETUP | /--* argPlace ref REG NA $c5
1660 // this in rcx | | /--* argPlace ref REG NA $c1
1661 // | | | /--* call ref System.Globalization.CultureInfo.get_InvariantCulture $c2
1662 // arg1 SETUP | | +--* st.lclVar ref V02 tmp1 REG NA $c2
1663 // | | | /--* lclVar ref V02 tmp1 u : 2 (last use) REG NA $c2
1664 // arg1 in rdx | | +--* putarg_reg ref REG NA
1665 // | | | /--* lclVar ref V00 arg0 u : 2 (last use) REG NA $80
1666 // this in rcx | | +--* putarg_reg ref REG NA
1667 // | | /--* call nullcheck ref System.String.ToLower $c5
1668 // | | { * stmtExpr void (embedded)(IL 0x000... ? ? ? )
1669 // | | { \--* prof_hook void REG NA
1670 // arg0 in rcx | +--* putarg_reg ref REG NA
1671 // control expr | +--* const(h) long 0x7ffe8e910e98 ftn REG NA
1672 // \--* call void System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void
1674 // In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call
1675 // (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
1678 // Insert the profiler hook immediately before the call. The profiler hook will preserve
1679 // all argument registers (ECX, EDX), but nothing else.
1682 // callNode - tail call node
1683 // insertionPoint - if non-null, insert the profiler hook before this point.
1684 // If null, insert the profiler hook before args are setup
1685 // but after all arg side effects are computed.
1687 void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint)
1689 assert(call->IsTailCall());
1690 assert(comp->compIsProfilerHookNeeded());
1692 #if defined(_TARGET_X86_)
1694 if (insertionPoint == nullptr)
1696 insertionPoint = call;
1699 #else // !defined(_TARGET_X86_)
1701 if (insertionPoint == nullptr)
1703 GenTreePtr tmp = nullptr;
1704 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
1706 tmp = args->Current();
1707 assert(tmp->OperGet() != GT_PUTARG_REG); // We don't expect to see these in gtCallArgs
1708 if (tmp->OperGet() == GT_PUTARG_STK)
1711 insertionPoint = tmp;
1716 if (insertionPoint == nullptr)
1718 for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
1720 tmp = args->Current();
1721 if ((tmp->OperGet() == GT_PUTARG_REG) || (tmp->OperGet() == GT_PUTARG_STK))
1724 insertionPoint = tmp;
1729 // If there are no args, insert before the call node
1730 if (insertionPoint == nullptr)
1732 insertionPoint = call;
1737 #endif // !defined(_TARGET_X86_)
1739 assert(insertionPoint != nullptr);
1740 GenTreePtr profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID);
1741 BlockRange().InsertBefore(insertionPoint, profHookNode);
1744 // Lower fast tail call implemented as epilog+jmp.
1745 // Also inserts PInvoke method epilog if required.
1746 void Lowering::LowerFastTailCall(GenTreeCall* call)
1748 #if FEATURE_FASTTAILCALL
1749 // Tail call restrictions i.e. conditions under which tail prefix is ignored.
1750 // Most of these checks are already done by importer or fgMorphTailCall().
1751 // This serves as a double sanity check.
1752 assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
1753 assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
1754 assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
1755 assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
1756 assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
1758 // We expect to see a call that meets the following conditions
1759 assert(call->IsFastTailCall());
1761 // VM cannot use return address hijacking when A() and B() tail call each
1762 // other in mutual recursion. Therefore, this block is reachable through
1763 // a GC-safe point or the whole method is marked as fully interruptible.
1766 // optReachWithoutCall() depends on the fact that loop headers blocks
1767 // will have a block number > fgLastBB. These loop headers gets added
1768 // after dominator computation and get skipped by OptReachWithoutCall().
1769 // The below condition cannot be asserted in lower because fgSimpleLowering()
1770 // can add a new basic block for range check failure which becomes
1771 // fgLastBB with block number > loop header block number.
1772 // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
1773 // !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
1775 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
1776 // a method returns. This is a case of caller method has both PInvokes and tail calls.
1777 if (comp->info.compCallUnmanaged)
1779 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
1782 // Args for tail call are setup in incoming arg area. The gc-ness of args of
1783 // caller and callee (which being tail called) may not match. Therefore, everything
1784 // from arg setup until the epilog need to be non-interuptible by GC. This is
1785 // achieved by inserting GT_START_NONGC before the very first GT_PUTARG_STK node
1786 // of call is setup. Note that once a stack arg is setup, it cannot have nested
1787 // calls subsequently in execution order to setup other args, because the nested
1788 // call could over-write the stack arg that is setup earlier.
1789 GenTreePtr firstPutArgStk = nullptr;
1790 GenTreeArgList* args;
1791 ArrayStack<GenTree*> putargs(comp);
1793 for (args = call->gtCallArgs; args; args = args->Rest())
1795 GenTreePtr tmp = args->Current();
1796 if (tmp->OperGet() == GT_PUTARG_STK)
1802 for (args = call->gtCallLateArgs; args; args = args->Rest())
1804 GenTreePtr tmp = args->Current();
1805 if (tmp->OperGet() == GT_PUTARG_STK)
1811 if (putargs.Height() > 0)
1813 firstPutArgStk = putargs.Bottom();
1816 // If we have a putarg_stk node, also count the number of non-standard args the
1817 // call node has. Note that while determining whether a tail call can be fast
1818 // tail called, we don't count non-standard args (passed in R10 or R11) since they
1819 // don't contribute to outgoing arg space. These non-standard args are not
1820 // accounted in caller's arg count but accounted in callee's arg count after
1821 // fgMorphArgs(). Therefore, exclude callee's non-standard args while mapping
1822 // callee's stack arg num to corresponding caller's stack arg num.
1823 unsigned calleeNonStandardArgCount = call->GetNonStandardAddedArgCount(comp);
1825 // Say Caller(a, b, c, d, e) fast tail calls Callee(e, d, c, b, a)
1826 // i.e. passes its arguments in reverse to Callee. During call site
1827 // setup, after computing argument side effects, stack args are setup
1828 // first and reg args next. In the above example, both Callers and
1829 // Callee stack args (e and a respectively) share the same stack slot
1830 // and are alive at the same time. The act of setting up Callee's
1831 // stack arg will over-write the stack arg of Caller and if there are
1832 // further uses of Caller stack arg we have to make sure that we move
1833 // it to a temp before over-writing its slot and use temp in place of
1834 // the corresponding Caller stack arg.
1836 // For the above example, conceptually this is what is done
1838 // Stack slot of e = a
1839 // R9 = b, R8 = c, RDx = d
1842 // The below logic is meant to detect cases like this and introduce
1843 // temps to set up args correctly for Callee.
1845 for (int i = 0; i < putargs.Height(); i++)
1847 GenTreePtr putArgStkNode = putargs.Bottom(i);
1849 assert(putArgStkNode->OperGet() == GT_PUTARG_STK);
1851 // Get the caller arg num corresponding to this callee arg.
1852 // Note that these two args share the same stack slot. Therefore,
1853 // if there are further uses of corresponding caller arg, we need
1854 // to move it to a temp and use the temp in this call tree.
1856 // Note that Caller is guaranteed to have a param corresponding to
1857 // this Callee's arg since fast tail call mechanism counts the
1858 // stack slots required for both Caller and Callee for passing params
1859 // and allow fast tail call only if stack slots required by Caller >=
1861 fgArgTabEntryPtr argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode);
1862 assert(argTabEntry);
1863 unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount;
1864 noway_assert(callerArgNum < comp->info.compArgsCount);
1866 unsigned callerArgLclNum = callerArgNum;
1867 LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum;
1868 if (callerArgDsc->lvPromoted)
1871 callerArgDsc->lvFieldLclStart; // update the callerArgNum to the promoted struct field's lclNum
1872 callerArgDsc = comp->lvaTable + callerArgLclNum;
1874 noway_assert(callerArgDsc->lvIsParam);
1876 // Start searching in execution order list till we encounter call node
1877 unsigned tmpLclNum = BAD_VAR_NUM;
1878 var_types tmpType = TYP_UNDEF;
1879 for (GenTreePtr treeNode = putArgStkNode->gtNext; treeNode != call; treeNode = treeNode->gtNext)
1881 if (treeNode->OperIsLocal() || treeNode->OperIsLocalAddr())
1883 // This should neither be a GT_REG_VAR nor GT_PHI_ARG.
1884 assert((treeNode->OperGet() != GT_REG_VAR) && (treeNode->OperGet() != GT_PHI_ARG));
1886 GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
1887 LclVarDsc* lclVar = &comp->lvaTable[lcl->gtLclNum];
1889 // Fast tail calling criteria permits passing of structs of size 1, 2, 4 and 8 as args.
1890 // It is possible that the callerArgLclNum corresponds to such a struct whose stack slot
1891 // is getting over-written by setting up of a stack arg and there are further uses of
1892 // any of its fields if such a struct is type-dependently promoted. In this case too
1893 // we need to introduce a temp.
1894 if ((lcl->gtLclNum == callerArgNum) || (lcl->gtLclNum == callerArgLclNum))
1896 // Create tmp and use it in place of callerArgDsc
1897 if (tmpLclNum == BAD_VAR_NUM)
1899 tmpLclNum = comp->lvaGrabTemp(
1900 true DEBUGARG("Fast tail call lowering is creating a new local variable"));
1901 comp->lvaSortAgain = true;
1902 tmpType = genActualType(callerArgDsc->lvaArgType());
1903 comp->lvaTable[tmpLclNum].lvType = tmpType;
1904 comp->lvaTable[tmpLclNum].lvRefCnt = 1;
1905 comp->lvaTable[tmpLclNum].lvDoNotEnregister = comp->lvaTable[lcl->gtLclNum].lvDoNotEnregister;
1908 lcl->SetLclNum(tmpLclNum);
1913 // If we have created a temp, insert an embedded assignment stmnt before
1914 // the first putargStkNode i.e.
1915 // tmpLcl = CallerArg
1916 if (tmpLclNum != BAD_VAR_NUM)
1918 assert(tmpType != TYP_UNDEF);
1919 GenTreeLclVar* local =
1920 new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET);
1921 GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local);
1922 ContainCheckRange(local, assignExpr);
1923 BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr));
1927 // Insert GT_START_NONGC node before the first GT_PUTARG_STK node.
1928 // Note that if there are no args to be setup on stack, no need to
1929 // insert GT_START_NONGC node.
1930 GenTreePtr startNonGCNode = nullptr;
1931 if (firstPutArgStk != nullptr)
1933 startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
1934 BlockRange().InsertBefore(firstPutArgStk, startNonGCNode);
1936 // Gc-interruptability in the following case:
1937 // foo(a, b, c, d, e) { bar(a, b, c, d, e); }
1938 // bar(a, b, c, d, e) { foo(a, b, d, d, e); }
1940 // Since the instruction group starting from the instruction that sets up first
1941 // stack arg to the end of the tail call is marked as non-gc interruptible,
1942 // this will form a non-interruptible tight loop causing gc-starvation. To fix
1943 // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
1944 // has a single basic block and is not a GC-safe point. The presence of a single
1945 // nop outside non-gc interruptible region will prevent gc starvation.
1946 if ((comp->fgBBcount == 1) && !(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT))
1948 assert(comp->fgFirstBB == comp->compCurBB);
1949 GenTreePtr noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
1950 BlockRange().InsertBefore(startNonGCNode, noOp);
1954 // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be
1955 // inserted before the args are setup but after the side effects of args are
1956 // computed. That is, GT_PROF_HOOK node needs to be inserted before GT_START_NONGC
1957 // node if one exists.
1958 if (comp->compIsProfilerHookNeeded())
1960 InsertProfTailCallHook(call, startNonGCNode);
1963 #else // !FEATURE_FASTTAILCALL
1965 // Platform choose not to implement fast tail call mechanism.
1966 // In such a case we should never be reaching this method as
1967 // the expectation is that IsTailCallViaHelper() will always
1968 // be true on such a platform.
1973 //------------------------------------------------------------------------
1974 // LowerTailCallViaHelper: lower a call via the tailcall helper. Morph
1975 // has already inserted tailcall helper special arguments. This function
1976 // inserts actual data for some placeholders.
1979 // tail.call(void* copyRoutine, void* dummyArg, ...)
1981 // Jit_TailCall(void* copyRoutine, void* callTarget, ...)
1984 // tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg)
1986 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
1988 // Note that the special arguments are on the stack, whereas the function arguments follow the normal convention.
1990 // Also inserts PInvoke method epilog if required.
1993 // call - The call node
1994 // callTarget - The real call target. This is used to replace the dummyArg during lowering.
1997 // Returns control expression tree for making a call to helper Jit_TailCall.
1999 GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget)
2001 // Tail call restrictions i.e. conditions under which tail prefix is ignored.
2002 // Most of these checks are already done by importer or fgMorphTailCall().
2003 // This serves as a double sanity check.
2004 assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
2005 assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
2006 assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
2007 assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
2009 #ifdef _TARGET_AMD64_
2010 assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
2011 #endif // _TARGET_AMD64_
2013 // We expect to see a call that meets the following conditions
2014 assert(call->IsTailCallViaHelper());
2015 assert(callTarget != nullptr);
2017 // The TailCall helper call never returns to the caller and is not GC interruptible.
2018 // Therefore the block containing the tail call should be a GC safe point to avoid
2019 // GC starvation. It is legal for the block to be unmarked iff the entry block is a
2020 // GC safe point, as the entry block trivially dominates every reachable block.
2021 assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || (comp->fgFirstBB->bbFlags & BBF_GC_SAFE_POINT));
2023 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
2024 // a method returns. This is a case of caller method has both PInvokes and tail calls.
2025 if (comp->info.compCallUnmanaged)
2027 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
2030 // Remove gtCallAddr from execution order if present.
2031 if (call->gtCallType == CT_INDIRECT)
2033 assert(call->gtCallAddr != nullptr);
2036 LIR::ReadOnlyRange callAddrRange = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed);
2039 BlockRange().Remove(std::move(callAddrRange));
2042 // The callTarget tree needs to be sequenced.
2043 LIR::Range callTargetRange = LIR::SeqTree(comp, callTarget);
2045 fgArgTabEntry* argEntry;
2047 #if defined(_TARGET_AMD64_)
2049 // For AMD64, first argument is CopyRoutine and second argument is a place holder node.
2052 argEntry = comp->gtArgEntryByArgNum(call, 0);
2053 assert(argEntry != nullptr);
2054 assert(argEntry->node->gtOper == GT_PUTARG_REG);
2055 GenTree* firstArg = argEntry->node->gtOp.gtOp1;
2056 assert(firstArg->gtOper == GT_CNS_INT);
2059 // Replace second arg by callTarget.
2060 argEntry = comp->gtArgEntryByArgNum(call, 1);
2061 assert(argEntry != nullptr);
2062 assert(argEntry->node->gtOper == GT_PUTARG_REG);
2063 GenTree* secondArg = argEntry->node->gtOp.gtOp1;
2065 ContainCheckRange(callTargetRange);
2066 BlockRange().InsertAfter(secondArg, std::move(callTargetRange));
2069 LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(secondArg, &isClosed);
2072 BlockRange().Remove(std::move(secondArgRange));
2074 argEntry->node->gtOp.gtOp1 = callTarget;
2076 #elif defined(_TARGET_X86_)
2078 // Verify the special args are what we expect, and replace the dummy args with real values.
2079 // We need to figure out the size of the outgoing stack arguments, not including the special args.
2080 // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes.
2081 // This number is exactly the next slot number in the call's argument info struct.
2082 unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum();
2083 assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args.
2084 nNewStkArgsWords -= 4;
2086 unsigned numArgs = call->fgArgInfo->ArgCount();
2088 // arg 0 == callTarget.
2089 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1);
2090 assert(argEntry != nullptr);
2091 assert(argEntry->node->gtOper == GT_PUTARG_STK);
2092 GenTree* arg0 = argEntry->node->gtOp.gtOp1;
2094 ContainCheckRange(callTargetRange);
2095 BlockRange().InsertAfter(arg0, std::move(callTargetRange));
2098 LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
2100 BlockRange().Remove(std::move(secondArgRange));
2102 argEntry->node->gtOp.gtOp1 = callTarget;
2105 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2);
2106 assert(argEntry != nullptr);
2107 assert(argEntry->node->gtOper == GT_PUTARG_STK);
2108 GenTree* arg1 = argEntry->node->gtOp.gtOp1;
2109 assert(arg1->gtOper == GT_CNS_INT);
2111 ssize_t tailCallHelperFlags = 1 | // always restore EDI,ESI,EBX
2112 (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag
2113 arg1->gtIntCon.gtIconVal = tailCallHelperFlags;
2115 // arg 2 == numberOfNewStackArgsWords
2116 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3);
2117 assert(argEntry != nullptr);
2118 assert(argEntry->node->gtOper == GT_PUTARG_STK);
2119 GenTree* arg2 = argEntry->node->gtOp.gtOp1;
2120 assert(arg2->gtOper == GT_CNS_INT);
2122 arg2->gtIntCon.gtIconVal = nNewStkArgsWords;
2125 // arg 3 == numberOfOldStackArgsWords
2126 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4);
2127 assert(argEntry != nullptr);
2128 assert(argEntry->node->gtOper == GT_PUTARG_STK);
2129 GenTree* arg3 = argEntry->node->gtOp.gtOp1;
2130 assert(arg3->gtOper == GT_CNS_INT);
2134 NYI("LowerTailCallViaHelper");
2137 // Transform this call node into a call to Jit tail call helper.
2138 call->gtCallType = CT_HELPER;
2139 call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL);
2140 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
2142 // Lower this as if it were a pure helper call.
2143 call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER);
2144 GenTree* result = LowerDirectCall(call);
2146 // Now add back tail call flags for identifying this node as tail call dispatched via helper.
2147 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
2149 #ifdef PROFILING_SUPPORTED
2150 // Insert profiler tail call hook if needed.
2151 // Since we don't know the insertion point, pass null for second param.
2152 if (comp->compIsProfilerHookNeeded())
2154 InsertProfTailCallHook(call, nullptr);
2156 #endif // PROFILING_SUPPORTED
2158 assert(call->IsTailCallViaHelper());
2163 //------------------------------------------------------------------------
2164 // Lowering::LowerCompare: Lowers a compare node.
2167 // cmp - the compare node
2170 // - Decomposes long comparisons that feed a GT_JTRUE (32 bit specific).
2171 // - Decomposes long comparisons that produce a value (X86 specific).
2172 // - Ensures that we don't have a mix of int/long operands (XARCH specific).
2173 // - Narrow operands to enable memory operand containment (XARCH specific).
2174 // - Transform cmp(and(x, y), 0) into test(x, y) (XARCH specific but could
2175 // be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added).
2177 void Lowering::LowerCompare(GenTree* cmp)
2179 #ifndef _TARGET_64BIT_
2180 if (cmp->gtGetOp1()->TypeGet() == TYP_LONG)
2182 GenTree* src1 = cmp->gtGetOp1();
2183 GenTree* src2 = cmp->gtGetOp2();
2184 assert(src1->OperIs(GT_LONG));
2185 assert(src2->OperIs(GT_LONG));
2186 GenTree* loSrc1 = src1->gtGetOp1();
2187 GenTree* hiSrc1 = src1->gtGetOp2();
2188 GenTree* loSrc2 = src2->gtGetOp1();
2189 GenTree* hiSrc2 = src2->gtGetOp2();
2190 BlockRange().Remove(src1);
2191 BlockRange().Remove(src2);
2193 genTreeOps condition = cmp->OperGet();
2197 if (cmp->OperIs(GT_EQ, GT_NE))
2200 // Transform (x EQ|NE y) into (((x.lo XOR y.lo) OR (x.hi XOR y.hi)) EQ|NE 0). If y is 0 then this can
2201 // be reduced to just ((x.lo OR x.hi) EQ|NE 0). The OR is expected to set the condition flags so we
2202 // don't need to generate a redundant compare against 0, we only generate a SETCC|JCC instruction.
2204 // XOR is used rather than SUB because it is commutative and thus allows swapping the operands when
2205 // the first happens to be a constant. Usually only the second compare operand is a constant but it's
2206 // still possible to have a constant on the left side. For example, when src1 is a uint->ulong cast
2207 // then hiSrc1 would be 0.
2210 if (loSrc1->OperIs(GT_CNS_INT))
2212 std::swap(loSrc1, loSrc2);
2215 if (loSrc2->IsIntegralConst(0))
2217 BlockRange().Remove(loSrc2);
2222 loCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, loSrc1, loSrc2);
2223 BlockRange().InsertBefore(cmp, loCmp);
2224 ContainCheckBinary(loCmp->AsOp());
2227 if (hiSrc1->OperIs(GT_CNS_INT))
2229 std::swap(hiSrc1, hiSrc2);
2232 if (hiSrc2->IsIntegralConst(0))
2234 BlockRange().Remove(hiSrc2);
2239 hiCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, hiSrc1, hiSrc2);
2240 BlockRange().InsertBefore(cmp, hiCmp);
2241 ContainCheckBinary(hiCmp->AsOp());
2244 hiCmp = comp->gtNewOperNode(GT_OR, TYP_INT, loCmp, hiCmp);
2245 BlockRange().InsertBefore(cmp, hiCmp);
2246 ContainCheckBinary(hiCmp->AsOp());
2250 assert(cmp->OperIs(GT_LT, GT_LE, GT_GE, GT_GT));
2253 // If the compare is signed then (x LT|GE y) can be transformed into ((x SUB y) LT|GE 0).
2254 // If the compare is unsigned we can still use SUB but we need to check the Carry flag,
2255 // not the actual result. In both cases we can simply check the appropiate condition flags
2256 // and ignore the actual result:
2257 // SUB_LO loSrc1, loSrc2
2258 // SUB_HI hiSrc1, hiSrc2
2259 // SETCC|JCC (signed|unsigned LT|GE)
2260 // If loSrc2 happens to be 0 then the first SUB can be eliminated and the second one can
2261 // be turned into a CMP because the first SUB would have set carry to 0. This effectively
2262 // transforms a long compare against 0 into an int compare of the high part against 0.
2264 // (x LE|GT y) can to be transformed into ((x SUB y) LE|GT 0) but checking that a long value
2265 // is greater than 0 is not so easy. We need to turn this into a positive/negative check
2266 // like the one we get for LT|GE compares, this can be achieved by swapping the compare:
2267 // (x LE|GT y) becomes (y GE|LT x)
2269 // Having to swap operands is problematic when the second operand is a constant. The constant
2270 // moves to the first operand where it cannot be contained and thus needs a register. This can
2271 // be avoided by changing the constant such that LE|GT becomes LT|GE:
2272 // (x LE|GT 41) becomes (x LT|GE 42)
2275 if (cmp->OperIs(GT_LE, GT_GT))
2277 bool mustSwap = true;
2279 if (loSrc2->OperIs(GT_CNS_INT) && hiSrc2->OperIs(GT_CNS_INT))
2281 uint32_t loValue = static_cast<uint32_t>(loSrc2->AsIntCon()->IconValue());
2282 uint32_t hiValue = static_cast<uint32_t>(hiSrc2->AsIntCon()->IconValue());
2283 uint64_t value = static_cast<uint64_t>(loValue) | (static_cast<uint64_t>(hiValue) << 32);
2284 uint64_t maxValue = cmp->IsUnsigned() ? UINT64_MAX : INT64_MAX;
2286 if (value != maxValue)
2289 loValue = value & UINT32_MAX;
2290 hiValue = (value >> 32) & UINT32_MAX;
2291 loSrc2->AsIntCon()->SetIconValue(loValue);
2292 hiSrc2->AsIntCon()->SetIconValue(hiValue);
2294 condition = cmp->OperIs(GT_LE) ? GT_LT : GT_GE;
2301 std::swap(loSrc1, loSrc2);
2302 std::swap(hiSrc1, hiSrc2);
2303 condition = GenTree::SwapRelop(condition);
2307 assert((condition == GT_LT) || (condition == GT_GE));
2309 if (loSrc2->IsIntegralConst(0))
2311 BlockRange().Remove(loSrc2);
2313 // Very conservative dead code removal... but it helps.
2315 if (loSrc1->OperIs(GT_CNS_INT, GT_LCL_VAR, GT_LCL_FLD))
2317 BlockRange().Remove(loSrc1);
2319 if (loSrc1->OperIs(GT_LCL_VAR, GT_LCL_FLD))
2321 comp->lvaDecRefCnts(m_block, loSrc1);
2326 loSrc1->SetUnusedValue();
2329 hiCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, hiSrc1, hiSrc2);
2330 BlockRange().InsertBefore(cmp, hiCmp);
2331 ContainCheckCompare(hiCmp->AsOp());
2335 loCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, loSrc1, loSrc2);
2336 hiCmp = comp->gtNewOperNode(GT_SUB_HI, TYP_INT, hiSrc1, hiSrc2);
2337 BlockRange().InsertBefore(cmp, loCmp, hiCmp);
2338 ContainCheckCompare(loCmp->AsOp());
2339 ContainCheckBinary(hiCmp->AsOp());
2342 // Try to move the first SUB_HI operands right in front of it, this allows using
2343 // a single temporary register instead of 2 (one for CMP and one for SUB_HI). Do
2344 // this only for locals as they won't change condition flags. Note that we could
2345 // move constants (except 0 which generates XOR reg, reg) but it's extremly rare
2346 // to have a constant as the first operand.
2349 if (hiSrc1->OperIs(GT_LCL_VAR, GT_LCL_FLD))
2351 BlockRange().Remove(hiSrc1);
2352 BlockRange().InsertBefore(hiCmp, hiSrc1);
2357 hiCmp->gtFlags |= GTF_SET_FLAGS;
2358 if (hiCmp->IsValue())
2360 hiCmp->SetUnusedValue();
2364 if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE))
2366 BlockRange().Remove(cmp);
2368 GenTree* jcc = cmpUse.User();
2369 jcc->gtOp.gtOp1 = nullptr;
2370 jcc->ChangeOper(GT_JCC);
2371 jcc->gtFlags |= (cmp->gtFlags & GTF_UNSIGNED) | GTF_USE_FLAGS;
2372 jcc->AsCC()->gtCondition = condition;
2376 cmp->gtOp.gtOp1 = nullptr;
2377 cmp->gtOp.gtOp2 = nullptr;
2378 cmp->ChangeOper(GT_SETCC);
2379 cmp->gtFlags |= GTF_USE_FLAGS;
2380 cmp->AsCC()->gtCondition = condition;
2387 #ifdef _TARGET_XARCH_
2388 #ifdef _TARGET_AMD64_
2389 if (cmp->gtGetOp1()->TypeGet() != cmp->gtGetOp2()->TypeGet())
2391 bool op1Is64Bit = (genTypeSize(cmp->gtGetOp1()->TypeGet()) == 8);
2392 bool op2Is64Bit = (genTypeSize(cmp->gtGetOp2()->TypeGet()) == 8);
2394 if (op1Is64Bit != op2Is64Bit)
2397 // Normally this should not happen. IL allows comparing int32 to native int but the importer
2398 // automatically inserts a cast from int32 to long on 64 bit architectures. However, the JIT
2399 // accidentally generates int/long comparisons internally:
2400 // - loop cloning compares int (and even small int) index limits against long constants
2402 // TODO-Cleanup: The above mentioned issues should be fixed and then the code below may be
2403 // replaced with an assert or at least simplified. The special casing of constants in code
2404 // below is only necessary to prevent worse code generation for switches and loop cloning.
2407 GenTree* longOp = op1Is64Bit ? cmp->gtOp.gtOp1 : cmp->gtOp.gtOp2;
2408 GenTree** smallerOpUse = op2Is64Bit ? &cmp->gtOp.gtOp1 : &cmp->gtOp.gtOp2;
2409 var_types smallerType = (*smallerOpUse)->TypeGet();
2411 assert(genTypeSize(smallerType) < 8);
2413 if (longOp->IsCnsIntOrI() && genTypeCanRepresentValue(smallerType, longOp->AsIntCon()->IconValue()))
2415 longOp->gtType = smallerType;
2417 else if ((*smallerOpUse)->IsCnsIntOrI())
2419 (*smallerOpUse)->gtType = TYP_LONG;
2423 GenTree* cast = comp->gtNewCastNode(TYP_LONG, *smallerOpUse, TYP_LONG);
2424 *smallerOpUse = cast;
2425 BlockRange().InsertAfter(cast->gtGetOp1(), cast);
2426 ContainCheckCast(cast->AsCast());
2430 #endif // _TARGET_AMD64_
2432 if (cmp->gtGetOp2()->IsIntegralConst())
2434 GenTree* op1 = cmp->gtGetOp1();
2435 var_types op1Type = op1->TypeGet();
2436 GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon();
2437 ssize_t op2Value = op2->IconValue();
2439 if (IsContainableMemoryOp(op1) && varTypeIsSmall(op1Type) && genTypeCanRepresentValue(op1Type, op2Value))
2442 // If op1's type is small then try to narrow op2 so it has the same type as op1.
2443 // Small types are usually used by memory loads and if both compare operands have
2444 // the same type then the memory load can be contained. In certain situations
2445 // (e.g "cmp ubyte, 200") we also get a smaller instruction encoding.
2448 op2->gtType = op1Type;
2450 else if (op1->OperIs(GT_CAST) && !op1->gtOverflow())
2452 GenTreeCast* cast = op1->AsCast();
2453 var_types castToType = cast->CastToType();
2454 GenTree* castOp = cast->gtGetOp1();
2456 if (((castToType == TYP_BOOL) || (castToType == TYP_UBYTE)) && FitsIn<UINT8>(op2Value))
2459 // Since we're going to remove the cast we need to be able to narrow the cast operand
2460 // to the cast type. This can be done safely only for certain opers (e.g AND, OR, XOR).
2461 // Some opers just can't be narrowed (e.g DIV, MUL) while other could be narrowed but
2462 // doing so would produce incorrect results (e.g. RSZ, RSH).
2464 // The below list of handled opers is conservative but enough to handle the most common
2465 // situations. In particular this include CALL, sometimes the JIT unnecessarilly widens
2466 // the result of bool returning calls.
2469 if (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() || IsContainableMemoryOp(castOp))
2471 assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation
2473 castOp->gtType = castToType;
2474 // If we have any contained memory ops on castOp, they must now not be contained.
2475 if (castOp->OperIsLogical())
2477 GenTree* op1 = castOp->gtGetOp1();
2478 if ((op1 != nullptr) && !op1->IsCnsIntOrI())
2480 op1->ClearContained();
2482 GenTree* op2 = castOp->gtGetOp2();
2483 if ((op2 != nullptr) && !op2->IsCnsIntOrI())
2485 op2->ClearContained();
2488 cmp->gtOp.gtOp1 = castOp;
2489 op2->gtType = castToType;
2491 BlockRange().Remove(cast);
2495 else if (op1->OperIs(GT_AND) && cmp->OperIs(GT_EQ, GT_NE))
2498 // Transform ((x AND y) EQ|NE 0) into (x TEST_EQ|TEST_NE y) when possible.
2501 GenTree* andOp1 = op1->gtGetOp1();
2502 GenTree* andOp2 = op1->gtGetOp2();
2507 // If we don't have a 0 compare we can get one by transforming ((x AND mask) EQ|NE mask)
2508 // into ((x AND mask) NE|EQ 0) when mask is a single bit.
2511 if (isPow2(static_cast<size_t>(op2Value)) && andOp2->IsIntegralConst(op2Value))
2514 op2->SetIconValue(0);
2515 cmp->SetOperRaw(GenTree::ReverseRelop(cmp->OperGet()));
2521 BlockRange().Remove(op1);
2522 BlockRange().Remove(op2);
2524 cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE);
2525 cmp->gtOp.gtOp1 = andOp1;
2526 cmp->gtOp.gtOp2 = andOp2;
2527 // We will re-evaluate containment below
2528 andOp1->ClearContained();
2529 andOp2->ClearContained();
2531 if (IsContainableMemoryOp(andOp1) && andOp2->IsIntegralConst())
2534 // For "test" we only care about the bits that are set in the second operand (mask).
2535 // If the mask fits in a small type then we can narrow both operands to generate a "test"
2536 // instruction with a smaller encoding ("test" does not have a r/m32, imm8 form) and avoid
2537 // a widening load in some cases.
2539 // For 16 bit operands we narrow only if the memory operand is already 16 bit. This matches
2540 // the behavior of a previous implementation and avoids adding more cases where we generate
2541 // 16 bit instructions that require a length changing prefix (0x66). These suffer from
2542 // significant decoder stalls on Intel CPUs.
2544 // We could also do this for 64 bit masks that fit into 32 bit but it doesn't help.
2545 // In such cases morph narrows down the existing GT_AND by inserting a cast between it and
2546 // the memory operand so we'd need to add more code to recognize and eliminate that cast.
2549 size_t mask = static_cast<size_t>(andOp2->AsIntCon()->IconValue());
2551 if (FitsIn<UINT8>(mask))
2553 andOp1->gtType = TYP_UBYTE;
2554 andOp2->gtType = TYP_UBYTE;
2556 else if (FitsIn<UINT16>(mask) && genTypeSize(andOp1) == 2)
2558 andOp1->gtType = TYP_CHAR;
2559 andOp2->gtType = TYP_CHAR;
2566 if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet())
2568 if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet()))
2571 // If both operands have the same type then codegen will use the common operand type to
2572 // determine the instruction type. For small types this would result in performing a
2573 // signed comparison of two small unsigned values without zero extending them to TYP_INT
2574 // which is incorrect. Note that making the comparison unsigned doesn't imply that codegen
2575 // has to generate a small comparison, it can still correctly generate a TYP_INT comparison.
2578 cmp->gtFlags |= GTF_UNSIGNED;
2581 #endif // _TARGET_XARCH_
2582 ContainCheckCompare(cmp->AsOp());
2585 // Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
2586 void Lowering::LowerJmpMethod(GenTree* jmp)
2588 assert(jmp->OperGet() == GT_JMP);
2590 JITDUMP("lowering GT_JMP\n");
2592 JITDUMP("============");
2594 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
2595 // a method returns.
2596 if (comp->info.compCallUnmanaged)
2598 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp));
2602 // Lower GT_RETURN node to insert PInvoke method epilog if required.
2603 void Lowering::LowerRet(GenTree* ret)
2605 assert(ret->OperGet() == GT_RETURN);
2607 JITDUMP("lowering GT_RETURN\n");
2609 JITDUMP("============");
2611 #if defined(_TARGET_AMD64_) && defined(FEATURE_SIMD)
2612 GenTreeUnOp* const unOp = ret->AsUnOp();
2613 if ((unOp->TypeGet() == TYP_LONG) && (unOp->gtOp1->TypeGet() == TYP_SIMD8))
2615 GenTreeUnOp* bitcast = new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, TYP_LONG, unOp->gtOp1, nullptr);
2616 unOp->gtOp1 = bitcast;
2617 BlockRange().InsertBefore(unOp, bitcast);
2619 #endif // _TARGET_AMD64_
2621 // Method doing PInvokes has exactly one return block unless it has tail calls.
2622 if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB))
2624 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret));
2626 ContainCheckRet(ret->AsOp());
2629 GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
2631 noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER);
2633 // Don't support tail calling helper methods.
2634 // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper.
2635 noway_assert(!call->IsTailCall() || call->IsTailCallViaHelper() || call->gtCallType == CT_USER_FUNC);
2637 // Non-virtual direct/indirect calls: Work out if the address of the
2638 // call is known at JIT time. If not it is either an indirect call
2639 // or the address must be accessed via an single/double indirection.
2642 InfoAccessType accessType;
2643 CorInfoHelpFunc helperNum = comp->eeGetHelperNum(call->gtCallMethHnd);
2645 #ifdef FEATURE_READYTORUN_COMPILER
2646 if (call->gtEntryPoint.addr != nullptr)
2648 accessType = call->gtEntryPoint.accessType;
2649 addr = call->gtEntryPoint.addr;
2653 if (call->gtCallType == CT_HELPER)
2655 noway_assert(helperNum != CORINFO_HELP_UNDEF);
2657 // the convention on getHelperFtn seems to be (it's not documented)
2658 // that it returns an address or if it returns null, pAddr is set to
2659 // another address, which requires an indirection
2661 addr = comp->info.compCompHnd->getHelperFtn(helperNum, (void**)&pAddr);
2663 if (addr != nullptr)
2665 accessType = IAT_VALUE;
2669 accessType = IAT_PVALUE;
2675 noway_assert(helperNum == CORINFO_HELP_UNDEF);
2677 CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
2679 if (call->IsSameThis())
2681 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
2684 if (!call->NeedsNullCheck())
2686 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
2689 CORINFO_CONST_LOOKUP addrInfo;
2690 comp->info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo, aflags);
2692 accessType = addrInfo.accessType;
2693 addr = addrInfo.addr;
2696 GenTree* result = nullptr;
2700 // Non-virtual direct call to known address
2701 if (!IsCallTargetInRange(addr) || call->IsTailCall())
2703 result = AddrGen(addr);
2707 // a direct call within range of hardware relative call instruction
2708 // stash the address for codegen
2709 call->gtDirectCallAddress = addr;
2715 // Non-virtual direct calls to addresses accessed by
2716 // a single indirection.
2717 GenTree* cellAddr = AddrGen(addr);
2718 GenTree* indir = Ind(cellAddr);
2720 #ifdef FEATURE_READYTORUN_COMPILER
2721 #if defined(_TARGET_ARMARCH_)
2722 // For arm64, we dispatch code same as VSD using X11 for indirection cell address,
2723 // which ZapIndirectHelperThunk expects.
2724 if (call->IsR2RRelativeIndir())
2726 cellAddr->gtRegNum = REG_R2R_INDIRECT_PARAM;
2727 indir->gtRegNum = REG_JUMP_THUNK_PARAM;
2736 // Non-virtual direct calls to addresses accessed by
2737 // a double indirection.
2739 // Double-indirection. Load the address into a register
2740 // and call indirectly through the register
2741 noway_assert(helperNum == CORINFO_HELP_UNDEF);
2742 result = AddrGen(addr);
2743 result = Ind(Ind(result));
2747 noway_assert(!"Bad accessType");
2754 GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
2756 noway_assert(call->gtCallType == CT_USER_FUNC);
2758 assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
2759 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
2761 GenTree* thisArgNode;
2762 if (call->IsTailCallViaHelper())
2764 #ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
2765 const unsigned argNum = 0;
2766 #else // !_TARGET_X86_
2767 // In case of helper dispatched tail calls, "thisptr" will be the third arg.
2768 // The first two args are: real call target and addr of args copy routine.
2769 const unsigned argNum = 2;
2770 #endif // !_TARGET_X86_
2772 fgArgTabEntryPtr thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum);
2773 thisArgNode = thisArgTabEntry->node;
2777 thisArgNode = comp->gtGetThisArg(call);
2780 assert(thisArgNode->gtOper == GT_PUTARG_REG);
2781 GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1;
2782 GenTree* thisExpr = originalThisExpr;
2784 // We're going to use the 'this' expression multiple times, so make a local to copy it.
2789 if (call->IsTailCallViaHelper() && originalThisExpr->IsLocal())
2791 // For ordering purposes for the special tailcall arguments on x86, we forced the
2792 // 'this' pointer in this case to a local in Compiler::fgMorphTailCall().
2793 // We could possibly use this case to remove copies for all architectures and non-tailcall
2794 // calls by creating a new lcl var or lcl field reference, as is done in the
2795 // LowerVirtualVtableCall() code.
2796 assert(originalThisExpr->OperGet() == GT_LCL_VAR);
2797 lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum();
2800 #endif // _TARGET_X86_
2802 unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call"));
2804 LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode);
2805 ReplaceWithLclVar(thisExprUse, delegateInvokeTmp);
2807 thisExpr = thisExprUse.Def(); // it's changed; reload it.
2808 lclNum = delegateInvokeTmp;
2811 // replace original expression feeding into thisPtr with
2812 // [originalThis + offsetOfDelegateInstance]
2814 GenTree* newThisAddr = new (comp, GT_LEA)
2815 GenTreeAddrMode(TYP_BYREF, thisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
2817 GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
2819 BlockRange().InsertAfter(thisExpr, newThisAddr, newThis);
2821 thisArgNode->gtOp.gtOp1 = newThis;
2823 // the control target is
2824 // [originalThis + firstTgtOffs]
2826 GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET);
2828 unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget;
2829 GenTree* result = new (comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs);
2830 GenTree* callTarget = Ind(result);
2832 // don't need to sequence and insert this tree, caller will do it
2837 GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
2840 if (call->gtCallCookie != nullptr)
2842 NYI_X86("Morphing indirect non-virtual call with non-standard args");
2846 // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
2847 // Hence we should never see this type of call in lower.
2849 noway_assert(call->gtCallCookie == nullptr);
2854 //------------------------------------------------------------------------
2855 // CreateReturnTrapSeq: Create a tree to perform a "return trap", used in PInvoke
2856 // epilogs to invoke a GC under a condition. The return trap checks some global
2857 // location (the runtime tells us where that is and how many indirections to make),
2858 // then, based on the result, conditionally calls a GC helper. We use a special node
2859 // for this because at this time (late in the compilation phases), introducing flow
2860 // is tedious/difficult.
2862 // This is used for PInvoke inlining.
2865 // Code tree to perform the action.
2867 GenTree* Lowering::CreateReturnTrapSeq()
2869 // The GT_RETURNTRAP node expands to this:
2870 // if (g_TrapReturningThreads)
2872 // RareDisablePreemptiveGC();
2875 // The only thing to do here is build up the expression that evaluates 'g_TrapReturningThreads'.
2877 void* pAddrOfCaptureThreadGlobal = nullptr;
2878 LONG* addrOfCaptureThreadGlobal = comp->info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
2881 if (addrOfCaptureThreadGlobal != nullptr)
2883 testTree = Ind(AddrGen(addrOfCaptureThreadGlobal));
2887 testTree = Ind(Ind(AddrGen(pAddrOfCaptureThreadGlobal)));
2889 return comp->gtNewOperNode(GT_RETURNTRAP, TYP_INT, testTree);
2892 //------------------------------------------------------------------------
2893 // SetGCState: Create a tree that stores the given constant (0 or 1) into the
2894 // thread's GC state field.
2896 // This is used for PInvoke inlining.
2899 // state - constant (0 or 1) to store into the thread's GC state field.
2902 // Code tree to perform the action.
2904 GenTree* Lowering::SetGCState(int state)
2906 // Thread.offsetOfGcState = 0/1
2908 assert(state == 0 || state == 1);
2910 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
2912 GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1);
2914 GenTree* stateNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state);
2915 GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState);
2916 GenTree* storeGcState = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_BYTE, addr, stateNode);
2917 return storeGcState;
2920 //------------------------------------------------------------------------
2921 // CreateFrameLinkUpdate: Create a tree that either links or unlinks the
2922 // locally-allocated InlinedCallFrame from the Frame list.
2924 // This is used for PInvoke inlining.
2927 // action - whether to link (push) or unlink (pop) the Frame
2930 // Code tree to perform the action.
2932 GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
2934 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
2935 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
2937 GenTree* TCB = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
2938 (IL_OFFSET)-1); // cast to resolve ambiguity.
2941 GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, TCB, nullptr, 1, pInfo->offsetOfThreadFrame);
2943 GenTree* data = nullptr;
2945 if (action == PushFrame)
2947 // Thread->m_pFrame = &inlinedCallFrame;
2948 data = new (comp, GT_LCL_FLD_ADDR)
2949 GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
2953 assert(action == PopFrame);
2954 // Thread->m_pFrame = inlinedCallFrame.m_pNext;
2956 data = new (comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar,
2957 pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
2959 GenTree* storeInd = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data);
2963 //------------------------------------------------------------------------
2964 // InsertPInvokeMethodProlog: Create the code that runs at the start of
2965 // every method that has PInvoke calls.
2967 // Initialize the TCB local and the InlinedCallFrame object. Then link ("push")
2968 // the InlinedCallFrame object on the Frame chain. The layout of InlinedCallFrame
2969 // is defined in vm/frames.h. See also vm/jitinterface.cpp for more information.
2970 // The offsets of these fields is returned by the VM in a call to ICorStaticInfo::getEEInfo().
2972 // The (current) layout is as follows:
2974 // 64-bit 32-bit CORINFO_EE_INFO
2975 // offset offset field name offset when set
2976 // -----------------------------------------------------------------------------------------
2977 // +00h +00h GS cookie offsetOfGSCookie
2978 // +08h +04h vptr for class InlinedCallFrame offsetOfFrameVptr method prolog
2979 // +10h +08h m_Next offsetOfFrameLink method prolog
2980 // +18h +0Ch m_Datum offsetOfCallTarget call site
2981 // +20h n/a m_StubSecretArg not set by JIT
2982 // +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method
2984 // non-x86: method prolog (SP remains
2985 // constant in function, after prolog: no
2986 // localloc and PInvoke in same function)
2987 // +30h +14h m_pCallerReturnAddress offsetOfReturnAddress call site
2988 // +38h +18h m_pCalleeSavedFP offsetOfCalleeSavedFP not set by JIT
2989 // +1Ch JIT retval spill area (int) before call_gc ???
2990 // +20h JIT retval spill area (long) before call_gc ???
2991 // +24h Saved value of EBP method prolog ???
2993 // Note that in the VM, InlinedCallFrame is a C++ class whose objects have a 'this' pointer that points
2994 // to the InlinedCallFrame vptr (the 2nd field listed above), and the GS cookie is stored *before*
2995 // the object. When we link the InlinedCallFrame onto the Frame chain, we must point at this location,
2996 // and not at the beginning of the InlinedCallFrame local, which is actually the GS cookie.
3001 void Lowering::InsertPInvokeMethodProlog()
3003 noway_assert(comp->info.compCallUnmanaged);
3004 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
3006 if (comp->opts.ShouldUsePInvokeHelpers())
3011 JITDUMP("======= Inserting PInvoke method prolog\n");
3013 // The first BB must be a scratch BB in order for us to be able to safely insert the P/Invoke prolog.
3014 assert(comp->fgFirstBBisScratch());
3016 LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB);
3018 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
3019 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
3021 // First arg: &compiler->lvaInlinedPInvokeFrameVar + callFrameInfo.offsetOfFrameVptr
3023 GenTree* frameAddr = new (comp, GT_LCL_FLD_ADDR)
3024 GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
3026 // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
3027 // TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
3028 // for x86, don't pass the secretArg.
3029 CLANG_FORMAT_COMMENT_ANCHOR;
3031 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3032 GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
3034 GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM));
3037 GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, 0, argList);
3039 // some sanity checks on the frame list root vardsc
3040 LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot];
3041 noway_assert(!varDsc->lvIsParam);
3042 noway_assert(varDsc->lvType == TYP_I_IMPL);
3045 new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
3046 (IL_OFFSET)-1); // cast to resolve ambiguity.
3047 store->gtOp.gtOp1 = call;
3048 store->gtFlags |= GTF_VAR_DEF;
3050 GenTree* const insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode();
3052 comp->fgMorphTree(store);
3053 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store));
3054 DISPTREERANGE(firstBlockRange, store);
3056 #if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
3057 // For x86, this step is done at the call site (due to stack pointer not being static in the function).
3058 // For arm32, CallSiteSP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME.
3060 // --------------------------------------------------------
3061 // InlinedCallFrame.m_pCallSiteSP = @RSP;
3063 GenTreeLclFld* storeSP = new (comp, GT_STORE_LCL_FLD)
3064 GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
3065 storeSP->gtOp1 = PhysReg(REG_SPBASE);
3066 storeSP->gtFlags |= GTF_VAR_DEF;
3068 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP));
3069 DISPTREERANGE(firstBlockRange, storeSP);
3071 #endif // !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
3073 #if !defined(_TARGET_ARM_)
3074 // For arm32, CalleeSavedFP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME.
3076 // --------------------------------------------------------
3077 // InlinedCallFrame.m_pCalleeSavedEBP = @RBP;
3079 GenTreeLclFld* storeFP =
3080 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
3081 callFrameInfo.offsetOfCalleeSavedFP);
3082 storeFP->gtOp1 = PhysReg(REG_FPBASE);
3083 storeFP->gtFlags |= GTF_VAR_DEF;
3085 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP));
3086 DISPTREERANGE(firstBlockRange, storeFP);
3087 #endif // !defined(_TARGET_ARM_)
3089 // --------------------------------------------------------
3090 // On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto
3091 // the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame.
3092 CLANG_FORMAT_COMMENT_ANCHOR;
3094 #ifdef _TARGET_64BIT_
3095 if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
3097 // Push a frame - if we are NOT in an IL stub, this is done right before the call
3098 // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
3099 GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
3100 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
3101 ContainCheckStoreIndir(frameUpd->AsIndir());
3102 DISPTREERANGE(firstBlockRange, frameUpd);
3104 #endif // _TARGET_64BIT_
3107 //------------------------------------------------------------------------
3108 // InsertPInvokeMethodEpilog: Code that needs to be run when exiting any method
3109 // that has PInvoke inlines. This needs to be inserted any place you can exit the
3110 // function: returns, tailcalls and jmps.
3113 // returnBB - basic block from which a method can return
3114 // lastExpr - GenTree of the last top level stmnt of returnBB (debug only arg)
3117 // Code tree to perform the action.
3119 void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePtr lastExpr))
3121 assert(returnBB != nullptr);
3122 assert(comp->info.compCallUnmanaged);
3124 if (comp->opts.ShouldUsePInvokeHelpers())
3129 JITDUMP("======= Inserting PInvoke method epilog\n");
3131 // Method doing PInvoke calls has exactly one return block unless it has "jmp" or tail calls.
3132 assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) ||
3133 returnBB->endsWithTailCallOrJmp(comp));
3135 LIR::Range& returnBlockRange = LIR::AsRange(returnBB);
3137 GenTree* insertionPoint = returnBlockRange.LastNode();
3138 assert(insertionPoint == lastExpr);
3140 // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution
3141 // order so that it is guaranteed that there will be no further PInvokes after that point in the method.
3143 // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be
3144 // Op1, PME, GT_RETURN
3146 // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be
3147 // arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL
3148 // After inserting PME execution order would be:
3149 // arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL
3151 // Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP
3152 // That is after PME, args for GT_JMP call will be setup.
3154 // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a
3155 // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant,
3157 // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has
3158 // PInvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot
3159 // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to
3160 // properly extend the life of compLvFrameListRoot var.
3162 // Thread.offsetOfGcState = 0/1
3163 // That is [tcb + offsetOfGcState] = 1
3164 GenTree* storeGCState = SetGCState(1);
3165 returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState));
3166 ContainCheckStoreIndir(storeGCState->AsIndir());
3168 // Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do
3169 // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call.
3170 CLANG_FORMAT_COMMENT_ANCHOR;
3172 #ifdef _TARGET_64BIT_
3173 if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
3174 #endif // _TARGET_64BIT_
3176 GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
3177 returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
3178 ContainCheckStoreIndir(frameUpd->AsIndir());
3182 //------------------------------------------------------------------------
3183 // InsertPInvokeCallProlog: Emit the call-site prolog for direct calls to unmanaged code.
3184 // It does all the necessary call-site setup of the InlinedCallFrame.
3187 // call - the call for which we are inserting the PInvoke prolog.
3192 void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
3194 JITDUMP("======= Inserting PInvoke call prolog\n");
3196 GenTree* insertBefore = call;
3197 if (call->gtCallType == CT_INDIRECT)
3200 insertBefore = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
3204 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
3206 gtCallTypes callType = (gtCallTypes)call->gtCallType;
3208 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
3210 if (comp->opts.ShouldUsePInvokeHelpers())
3212 // First argument is the address of the frame variable.
3213 GenTree* frameAddr = new (comp, GT_LCL_VAR_ADDR)
3214 GenTreeLclVar(GT_LCL_VAR_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
3216 // Insert call to CORINFO_HELP_JIT_PINVOKE_BEGIN
3217 GenTree* helperCall =
3218 comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
3220 comp->fgMorphTree(helperCall);
3221 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall));
3222 LowerNode(helperCall); // helper call is inserted before current node and should be lowered here.
3226 // Emit the following sequence:
3228 // InlinedCallFrame.callTarget = methodHandle // stored in m_Datum
3229 // InlinedCallFrame.m_pCallSiteSP = SP // x86 only
3230 // InlinedCallFrame.m_pCallerReturnAddress = return address
3231 // Thread.gcState = 0
3232 // (non-stub) - update top Frame on TCB // 64-bit targets only
3234 // ----------------------------------------------------------------------------------
3235 // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it).
3236 // The actual field is InlinedCallFrame.m_Datum which has many different uses and meanings.
3238 GenTree* src = nullptr;
3240 if (callType == CT_INDIRECT)
3242 #if !defined(_TARGET_64BIT_)
3243 // On 32-bit targets, indirect calls need the size of the stack args in InlinedCallFrame.m_Datum.
3244 const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotNum() * TARGET_POINTER_SIZE;
3246 src = comp->gtNewIconNode(numStkArgBytes, TYP_INT);
3248 // On 64-bit targets, indirect calls may need the stub parameter value in InlinedCallFrame.m_Datum.
3249 // If the stub parameter value is not needed, m_Datum will be initialized by the VM.
3250 if (comp->info.compPublishStubParam)
3252 src = comp->gtNewLclvNode(comp->lvaStubArgumentVar, TYP_I_IMPL);
3254 #endif // !defined(_TARGET_64BIT_)
3258 assert(callType == CT_USER_FUNC);
3260 void* pEmbedMethodHandle = nullptr;
3261 CORINFO_METHOD_HANDLE embedMethodHandle =
3262 comp->info.compCompHnd->embedMethodHandle(call->gtCallMethHnd, &pEmbedMethodHandle);
3264 noway_assert((!embedMethodHandle) != (!pEmbedMethodHandle));
3266 if (embedMethodHandle != nullptr)
3268 // InlinedCallFrame.callSiteTarget = methodHandle
3269 src = AddrGen(embedMethodHandle);
3273 // InlinedCallFrame.callSiteTarget = *pEmbedMethodHandle
3274 src = Ind(AddrGen(pEmbedMethodHandle));
3280 // Store into InlinedCallFrame.m_Datum, the offset of which is given by offsetOfCallTarget.
3281 GenTreeLclFld* store =
3282 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
3283 callFrameInfo.offsetOfCallTarget);
3285 store->gtFlags |= GTF_VAR_DEF;
3287 InsertTreeBeforeAndContainCheck(insertBefore, store);
3292 // ----------------------------------------------------------------------------------
3293 // InlinedCallFrame.m_pCallSiteSP = SP
3295 GenTreeLclFld* storeCallSiteSP = new (comp, GT_STORE_LCL_FLD)
3296 GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
3298 storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE);
3299 storeCallSiteSP->gtFlags |= GTF_VAR_DEF;
3301 InsertTreeBeforeAndContainCheck(insertBefore, storeCallSiteSP);
3305 // ----------------------------------------------------------------------------------
3306 // InlinedCallFrame.m_pCallerReturnAddress = &label (the address of the instruction immediately following the call)
3308 GenTreeLclFld* storeLab =
3309 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
3310 callFrameInfo.offsetOfReturnAddress);
3312 // We don't have a real label, and inserting one is hard (even if we made a special node),
3313 // so for now we will just 'know' what this means in codegen.
3314 GenTreeLabel* labelRef = new (comp, GT_LABEL) GenTreeLabel(nullptr);
3315 labelRef->gtType = TYP_I_IMPL;
3316 storeLab->gtOp1 = labelRef;
3317 storeLab->gtFlags |= GTF_VAR_DEF;
3319 InsertTreeBeforeAndContainCheck(insertBefore, storeLab);
3321 // Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method
3322 // contains PInvokes; on 64-bit targets this is necessary in non-stubs.
3323 CLANG_FORMAT_COMMENT_ANCHOR;
3325 #ifdef _TARGET_64BIT_
3326 if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
3328 // Set the TCB's frame to be the one we just created.
3329 // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME)
3330 // has prepended it to the linked list to maintain the stack of Frames.
3332 // Stubs do this once per stub, not once per call.
3333 GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
3334 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
3335 ContainCheckStoreIndir(frameUpd->AsIndir());
3337 #endif // _TARGET_64BIT_
3339 // IMPORTANT **** This instruction must come last!!! ****
3340 // It changes the thread's state to Preemptive mode
3341 // ----------------------------------------------------------------------------------
3342 // [tcb + offsetOfGcState] = 0
3344 GenTree* storeGCState = SetGCState(0);
3345 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState));
3346 ContainCheckStoreIndir(storeGCState->AsIndir());
3349 //------------------------------------------------------------------------
3350 // InsertPInvokeCallEpilog: Insert the code that goes after every inlined pinvoke call.
3353 // call - the call for which we are inserting the PInvoke epilog.
3358 void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
3360 JITDUMP("======= Inserting PInvoke call epilog\n");
3362 if (comp->opts.ShouldUsePInvokeHelpers())
3364 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
3366 // First argument is the address of the frame variable.
3367 GenTree* frameAddr =
3368 new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
3369 frameAddr->SetOperRaw(GT_LCL_VAR_ADDR);
3371 // Insert call to CORINFO_HELP_JIT_PINVOKE_END
3372 GenTreeCall* helperCall =
3373 comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
3375 comp->fgMorphTree(helperCall);
3376 BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall));
3377 ContainCheckCallOperands(helperCall);
3382 GenTree* insertionPoint = call->gtNext;
3384 GenTree* tree = SetGCState(1);
3385 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
3386 ContainCheckStoreIndir(tree->AsIndir());
3388 tree = CreateReturnTrapSeq();
3389 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
3390 ContainCheckReturnTrap(tree->AsOp());
3392 // Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets thi
3393 // happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive.
3394 CLANG_FORMAT_COMMENT_ANCHOR;
3396 #ifdef _TARGET_64BIT_
3397 if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
3399 tree = CreateFrameLinkUpdate(PopFrame);
3400 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
3401 ContainCheckStoreIndir(tree->AsIndir());
3404 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
3406 // ----------------------------------------------------------------------------------
3407 // InlinedCallFrame.m_pCallerReturnAddress = nullptr
3409 GenTreeLclFld* const storeCallSiteTracker =
3410 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
3411 callFrameInfo.offsetOfReturnAddress);
3413 GenTreeIntCon* const constantZero = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
3415 storeCallSiteTracker->gtOp1 = constantZero;
3416 storeCallSiteTracker->gtFlags |= GTF_VAR_DEF;
3418 BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker);
3419 ContainCheckStoreLoc(storeCallSiteTracker);
3420 #endif // _TARGET_64BIT_
3423 //------------------------------------------------------------------------
3424 // LowerNonvirtPinvokeCall: Lower a non-virtual / indirect PInvoke call
3427 // call - The call to lower.
3430 // The lowered call tree.
3432 GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
3434 // PInvoke lowering varies depending on the flags passed in by the EE. By default,
3435 // GC transitions are generated inline; if CORJIT_FLAG_USE_PINVOKE_HELPERS is specified,
3436 // GC transitions are instead performed using helper calls. Examples of each case are given
3437 // below. Note that the data structure that is used to store information about a call frame
3438 // containing any P/Invoke calls is initialized in the method prolog (see
3439 // InsertPInvokeMethod{Prolog,Epilog} for details).
3441 // Inline transitions:
3442 // InlinedCallFrame inlinedCallFrame;
3446 // // Set up frame information
3447 // inlinedCallFrame.callTarget = methodHandle; // stored in m_Datum
3448 // inlinedCallFrame.m_pCallSiteSP = SP; // x86 only
3449 // inlinedCallFrame.m_pCallerReturnAddress = &label; (the address of the instruction immediately following the
3451 // Thread.m_pFrame = &inlinedCallFrame; (non-IL-stub only)
3453 // // Switch the thread's GC mode to preemptive mode
3454 // thread->m_fPreemptiveGCDisabled = 0;
3456 // // Call the unmanaged method
3459 // // Switch the thread's GC mode back to cooperative mode
3460 // thread->m_fPreemptiveGCDisabled = 1;
3462 // // Rendezvous with a running collection if necessary
3463 // if (g_TrapReturningThreads)
3464 // RareDisablePreemptiveGC();
3466 // Transistions using helpers:
3468 // OpaqueFrame opaqueFrame;
3472 // // Call the JIT_PINVOKE_BEGIN helper
3473 // JIT_PINVOKE_BEGIN(&opaqueFrame);
3475 // // Call the unmanaged method
3478 // // Call the JIT_PINVOKE_END helper
3479 // JIT_PINVOKE_END(&opaqueFrame);
3481 // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target
3482 // platform. They may be changed in the future such that they preserve all register values.
3484 GenTree* result = nullptr;
3485 void* addr = nullptr;
3487 // assert we have seen one of these
3488 noway_assert(comp->info.compCallUnmanaged != 0);
3490 // All code generated by this function must not contain the randomly-inserted NOPs
3491 // that we insert to inhibit JIT spraying in partial trust scenarios.
3492 // The PINVOKE_PROLOG op signals this to the code generator/emitter.
3494 GenTree* prolog = new (comp, GT_NOP) GenTree(GT_PINVOKE_PROLOG, TYP_VOID);
3495 BlockRange().InsertBefore(call, prolog);
3497 InsertPInvokeCallProlog(call);
3499 if (call->gtCallType != CT_INDIRECT)
3501 noway_assert(call->gtCallType == CT_USER_FUNC);
3502 CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
3504 CORINFO_CONST_LOOKUP lookup;
3505 comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
3507 void* addr = lookup.addr;
3508 switch (lookup.accessType)
3511 if (!IsCallTargetInRange(addr))
3513 result = AddrGen(addr);
3517 // a direct call within range of hardware relative call instruction
3518 // stash the address for codegen
3519 call->gtDirectCallAddress = addr;
3520 #ifdef FEATURE_READYTORUN_COMPILER
3521 call->gtEntryPoint.addr = nullptr;
3527 result = Ind(AddrGen(addr));
3531 result = Ind(Ind(AddrGen(addr)));
3536 InsertPInvokeCallEpilog(call);
3541 // Expand the code necessary to calculate the control target.
3542 // Returns: the expression needed to calculate the control target
3543 // May insert embedded statements
3544 GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
3546 noway_assert(call->gtCallType == CT_USER_FUNC);
3548 // If this is a tail call via helper, thisPtr will be the third argument.
3550 regNumber thisPtrArgReg;
3552 #ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
3553 if (call->IsTailCallViaHelper())
3556 thisPtrArgReg = REG_ARG_2;
3559 #endif // !_TARGET_X86_
3562 thisPtrArgReg = comp->codeGen->genGetThisArgReg(call);
3565 // get a reference to the thisPtr being passed
3566 fgArgTabEntry* argEntry = comp->gtArgEntryByArgNum(call, thisPtrArgNum);
3567 assert(argEntry->regNum == thisPtrArgReg);
3568 assert(argEntry->node->gtOper == GT_PUTARG_REG);
3569 GenTree* thisPtr = argEntry->node->gtOp.gtOp1;
3571 // If what we are passing as the thisptr is not already a local, make a new local to place it in
3572 // because we will be creating expressions based on it.
3574 if (thisPtr->IsLocal())
3576 lclNum = thisPtr->gtLclVarCommon.gtLclNum;
3580 // Split off the thisPtr and store to a temporary variable.
3581 if (vtableCallTemp == BAD_VAR_NUM)
3583 vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call"));
3586 LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node);
3587 ReplaceWithLclVar(thisPtrUse, vtableCallTemp);
3589 lclNum = vtableCallTemp;
3592 // We'll introduce another use of this local so increase its ref count.
3593 comp->lvaTable[lclNum].incRefCnts(comp->compCurBB->getBBWeight(comp), comp);
3595 // Get hold of the vtable offset (note: this might be expensive)
3596 unsigned vtabOffsOfIndirection;
3597 unsigned vtabOffsAfterIndirection;
3599 comp->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
3600 &vtabOffsAfterIndirection, &isRelative);
3602 // If the thisPtr is a local field, then construct a local field type node
3604 if (thisPtr->isLclField())
3606 local = new (comp, GT_LCL_FLD)
3607 GenTreeLclFld(GT_LCL_FLD, thisPtr->TypeGet(), lclNum, thisPtr->AsLclFld()->gtLclOffs);
3611 local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, thisPtr->TypeGet(), lclNum, BAD_IL_OFFSET);
3614 // pointer to virtual table = [REG_CALL_THIS + offs]
3615 GenTree* result = Ind(Offset(local, VPTR_OFFS));
3617 // Get the appropriate vtable chunk
3618 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
3622 // MethodTable offset is a relative pointer.
3624 // Additional temporary variable is used to store virtual table pointer.
3625 // Address of method is obtained by the next computations:
3627 // Save relative offset to tmp (vtab is virtual table pointer, vtabOffsOfIndirection is offset of
3628 // vtable-1st-level-indirection):
3629 // tmp = [vtab + vtabOffsOfIndirection]
3631 // Save address of method to result (vtabOffsAfterIndirection is offset of vtable-2nd-level-indirection):
3632 // result = [vtab + vtabOffsOfIndirection + vtabOffsAfterIndirection + tmp]
3633 unsigned lclNumTmp = comp->lvaGrabTemp(true DEBUGARG("lclNumTmp"));
3635 comp->lvaTable[lclNumTmp].incRefCnts(comp->compCurBB->getBBWeight(comp), comp);
3636 GenTree* lclvNodeStore = comp->gtNewTempAssign(lclNumTmp, result);
3638 LIR::Range range = LIR::SeqTree(comp, lclvNodeStore);
3639 JITDUMP("result of obtaining pointer to virtual table:\n");
3641 BlockRange().InsertBefore(call, std::move(range));
3643 GenTree* tmpTree = comp->gtNewLclvNode(lclNumTmp, result->TypeGet());
3644 tmpTree = Offset(tmpTree, vtabOffsOfIndirection);
3646 tmpTree = comp->gtNewOperNode(GT_IND, TYP_I_IMPL, tmpTree, false);
3647 GenTree* offs = comp->gtNewIconNode(vtabOffsOfIndirection + vtabOffsAfterIndirection, TYP_INT);
3648 result = comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, comp->gtNewLclvNode(lclNumTmp, result->TypeGet()), offs);
3650 result = Ind(OffsetByIndex(result, tmpTree));
3654 // result = [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
3655 result = Ind(Offset(result, vtabOffsOfIndirection));
3660 assert(!isRelative);
3663 // Load the function address
3664 // result = [reg+vtabOffs]
3667 result = Ind(Offset(result, vtabOffsAfterIndirection));
3673 // Lower stub dispatched virtual calls.
3674 GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
3676 assert((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB);
3678 // An x86 JIT which uses full stub dispatch must generate only
3679 // the following stub dispatch calls:
3681 // (1) isCallRelativeIndirect:
3682 // call dword ptr [rel32] ; FF 15 ---rel32----
3683 // (2) isCallRelative:
3684 // call abc ; E8 ---rel32----
3685 // (3) isCallRegisterIndirect:
3687 // call dword ptr [eax] ; FF 10
3689 // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
3690 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
3692 GenTree* result = nullptr;
3694 #ifdef _TARGET_64BIT_
3695 // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef
3696 // exception. Therefore, JIT needs to emit an explicit null check. Note that Jit64 too generates
3697 // an explicit null check.
3699 // Tail calls: fgMorphTailCall() materializes null check explicitly and hence no need to emit
3702 // Non-64-bit: No need to null check the this pointer - the dispatch code will deal with this.
3703 // The VM considers exceptions that occur in stubs on 64-bit to be not managed exceptions and
3704 // it would be difficult to change this in a way so that it affects only the right stubs.
3706 if (!call->IsTailCallViaHelper())
3708 call->gtFlags |= GTF_CALL_NULLCHECK;
3712 // This is code to set up an indirect call to a stub address computed
3713 // via dictionary lookup.
3714 if (call->gtCallType == CT_INDIRECT)
3716 // The importer decided we needed a stub call via a computed
3717 // stub dispatch address, i.e. an address which came from a dictionary lookup.
3718 // - The dictionary lookup produces an indirected address, suitable for call
3719 // via "call [VirtualStubParam.reg]"
3721 // This combination will only be generated for shared generic code and when
3722 // stub dispatch is active.
3724 // fgMorphArgs will have created trees to pass the address in VirtualStubParam.reg.
3725 // All we have to do here is add an indirection to generate the actual call target.
3727 GenTree* ind = Ind(call->gtCallAddr);
3728 ind->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
3730 BlockRange().InsertAfter(call->gtCallAddr, ind);
3731 ContainCheckIndir(ind->AsIndir());
3732 call->gtCallAddr = ind;
3736 // Direct stub call.
3737 // Get stub addr. This will return NULL if virtual call stubs are not active
3738 void* stubAddr = call->gtStubCallStubAddr;
3739 noway_assert(stubAddr != nullptr);
3741 // If not CT_INDIRECT, then it should always be relative indir call.
3742 // This is ensured by VM.
3743 noway_assert(call->IsVirtualStubRelativeIndir());
3745 // Direct stub calls, though the stubAddr itself may still need to be
3746 // accesed via an indirection.
3747 GenTree* addr = AddrGen(stubAddr);
3750 // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as
3751 // the target address, and we set a flag that it's a VSD call. The helper then
3752 // handles any necessary indirection.
3753 if (call->IsTailCallViaHelper())
3757 #endif // _TARGET_X86_
3759 if (result == nullptr)
3761 GenTree* indir = Ind(addr);
3763 // On x86 we generate this:
3764 // call dword ptr [rel32] ; FF 15 ---rel32----
3765 // So we don't use a register.
3766 #ifndef _TARGET_X86_
3767 // on x64 we must materialize the target using specific registers.
3768 addr->gtRegNum = comp->virtualStubParamInfo->GetReg();
3770 indir->gtRegNum = REG_JUMP_THUNK_PARAM;
3771 indir->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
3777 // TODO-Cleanup: start emitting random NOPS
3781 //------------------------------------------------------------------------
3782 // AddrModeCleanupHelper: Remove the nodes that are no longer used after an
3783 // addressing mode is constructed
3786 // addrMode - A pointer to a new GenTreeAddrMode
3787 // node - The node currently being considered for removal
3793 // 'addrMode' and 'node' must be contained in the current block
3795 void Lowering::AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node)
3797 if (node == addrMode->Base() || node == addrMode->Index())
3802 // TODO-LIR: change this to use the LIR mark bit and iterate instead of recursing
3803 node->VisitOperands([this, addrMode](GenTree* operand) -> GenTree::VisitResult {
3804 AddrModeCleanupHelper(addrMode, operand);
3805 return GenTree::VisitResult::Continue;
3808 BlockRange().Remove(node);
3811 //------------------------------------------------------------------------
3812 // Lowering::AreSourcesPossibleModifiedLocals:
3813 // Given two nodes which will be used in an addressing mode (base,
3814 // index), check to see if they are lclVar reads, and if so, walk
3815 // backwards from the use until both reads have been visited to
3816 // determine if they are potentially modified in that range.
3819 // addr - the node that uses the base and index nodes
3820 // base - the base node
3821 // index - the index node
3823 // Returns: true if either the base or index may be modified between the
3826 bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index)
3828 assert(addr != nullptr);
3830 unsigned markCount = 0;
3832 SideEffectSet baseSideEffects;
3833 if (base != nullptr)
3835 if (base->OperIsLocalRead())
3837 baseSideEffects.AddNode(comp, base);
3845 SideEffectSet indexSideEffects;
3846 if (index != nullptr)
3848 if (index->OperIsLocalRead())
3850 indexSideEffects.AddNode(comp, index);
3858 for (GenTree* cursor = addr;; cursor = cursor->gtPrev)
3860 assert(cursor != nullptr);
3867 if (cursor == index)
3872 if ((base == nullptr) && (index == nullptr))
3877 m_scratchSideEffects.Clear();
3878 m_scratchSideEffects.AddNode(comp, cursor);
3879 if ((base != nullptr) && m_scratchSideEffects.InterferesWith(baseSideEffects, false))
3884 if ((index != nullptr) && m_scratchSideEffects.InterferesWith(indexSideEffects, false))
3891 //------------------------------------------------------------------------
3892 // TryCreateAddrMode: recognize trees which can be implemented using an
3893 // addressing mode and transform them to a GT_LEA
3896 // use: the use of the address we want to transform
3897 // isIndir: true if this addressing mode is the child of an indir
3900 // The created LEA node or the original address node if an LEA could
3903 GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
3905 GenTree* addr = use.Def();
3906 GenTreePtr base = nullptr;
3907 GenTreePtr index = nullptr;
3909 unsigned offset = 0;
3912 // TODO-1stClassStructs: This logic is here to preserve prior behavior. Note that previously
3913 // block ops were not considered for addressing modes, but an add under it may have been.
3914 // This should be replaced with logic that more carefully determines when an addressing mode
3915 // would be beneficial for a block op.
3918 GenTree* indir = use.User();
3919 if (indir->TypeGet() == TYP_STRUCT)
3923 else if (varTypeIsStruct(indir))
3925 // We can have an indirection on the rhs of a block copy (it is the source
3926 // object). This is not a "regular" indirection.
3927 // (Note that the user check could be costly.)
3929 if (BlockRange().TryGetUse(indir, &indirUse) && indirUse.User()->OperIsIndir())
3935 isIndir = !indir->OperIsBlk();
3940 // Find out if an addressing mode can be constructed
3942 comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &scale, &offset, true /*nogen*/);
3951 // this is just a reg-const add
3952 if (index == nullptr)
3957 // this is just a reg-reg add
3958 if (scale == 1 && offset == 0)
3964 // make sure there are not any side effects between def of leaves and use
3965 if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index))
3967 JITDUMP("No addressing mode:\n ");
3972 GenTreePtr arrLength = nullptr;
3974 JITDUMP("Addressing mode:\n");
3975 JITDUMP(" Base\n ");
3977 if (index != nullptr)
3979 JITDUMP(" + Index * %u + %u\n ", scale, offset);
3984 JITDUMP(" + %u\n", offset);
3987 var_types addrModeType = addr->TypeGet();
3988 if (addrModeType == TYP_REF)
3990 addrModeType = TYP_BYREF;
3993 GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset);
3995 // Neither the base nor the index should now be contained.
3996 if (base != nullptr)
3998 base->ClearContained();
4000 if (index != nullptr)
4002 index->ClearContained();
4004 addrMode->gtRsvdRegs = addr->gtRsvdRegs;
4005 addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS);
4006 addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free.
4008 JITDUMP("New addressing mode node:\n");
4012 BlockRange().InsertAfter(addr, addrMode);
4014 // Now we need to remove all the nodes subsumed by the addrMode
4015 AddrModeCleanupHelper(addrMode, addr);
4017 // Replace the original address node with the addrMode.
4018 use.ReplaceWith(comp, addrMode);
4023 //------------------------------------------------------------------------
4024 // LowerAdd: turn this add into a GT_LEA if that would be profitable
4027 // node - the node we care about
4030 // The next node to lower if we have transformed the ADD; nullptr otherwise.
4032 GenTree* Lowering::LowerAdd(GenTree* node)
4034 GenTree* next = node->gtNext;
4036 #ifndef _TARGET_ARMARCH_
4037 if (varTypeIsIntegralOrI(node))
4040 if (BlockRange().TryGetUse(node, &use))
4042 // If this is a child of an indir, let the parent handle it.
4043 // If there is a chain of adds, only look at the topmost one.
4044 GenTree* parent = use.User();
4045 if (!parent->OperIsIndir() && (parent->gtOper != GT_ADD))
4047 GenTree* addr = TryCreateAddrMode(std::move(use), false);
4050 return addr->gtNext;
4055 #endif // !_TARGET_ARMARCH_
4060 //------------------------------------------------------------------------
4061 // LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
4064 // divMod - pointer to the GT_UDIV/GT_UMOD node to be lowered
4067 // Returns a boolean indicating whether the node was transformed.
4070 // - Transform UDIV/UMOD by power of 2 into RSZ/AND
4071 // - Transform UDIV by constant >= 2^(N-1) into GE
4072 // - Transform UDIV/UMOD by constant >= 3 into "magic division"
4075 bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
4077 assert(divMod->OperIs(GT_UDIV, GT_UMOD));
4079 GenTree* next = divMod->gtNext;
4080 GenTree* dividend = divMod->gtGetOp1();
4081 GenTree* divisor = divMod->gtGetOp2();
4083 #if !defined(_TARGET_64BIT_)
4084 if (dividend->OperIs(GT_LONG))
4090 if (!divisor->IsCnsIntOrI())
4095 if (dividend->IsCnsIntOrI())
4097 // We shouldn't see a divmod with constant operands here but if we do then it's likely
4098 // because optimizations are disabled or it's a case that's supposed to throw an exception.
4099 // Don't optimize this.
4103 const var_types type = divMod->TypeGet();
4104 assert((type == TYP_INT) || (type == TYP_I_IMPL));
4106 size_t divisorValue = static_cast<size_t>(divisor->AsIntCon()->IconValue());
4108 if (type == TYP_INT)
4110 // Clear up the upper 32 bits of the value, they may be set to 1 because constants
4111 // are treated as signed and stored in ssize_t which is 64 bit in size on 64 bit targets.
4112 divisorValue &= UINT32_MAX;
4115 if (divisorValue == 0)
4120 const bool isDiv = divMod->OperIs(GT_UDIV);
4122 if (isPow2(divisorValue))
4129 divisorValue = genLog2(divisorValue);
4137 divMod->SetOper(newOper);
4138 divisor->gtIntCon.SetIconValue(divisorValue);
4139 ContainCheckNode(divMod);
4144 // If the divisor is greater or equal than 2^(N - 1) then the result is 1
4145 // iff the dividend is greater or equal than the divisor.
4146 if (((type == TYP_INT) && (divisorValue > (UINT32_MAX / 2))) ||
4147 ((type == TYP_LONG) && (divisorValue > (UINT64_MAX / 2))))
4149 divMod->SetOper(GT_GE);
4150 divMod->gtFlags |= GTF_UNSIGNED;
4151 ContainCheckNode(divMod);
4156 // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32/64
4157 #ifdef _TARGET_XARCH_
4158 if (!comp->opts.MinOpts() && (divisorValue >= 3))
4164 if (type == TYP_INT)
4166 magic = MagicDivide::GetUnsigned32Magic(static_cast<uint32_t>(divisorValue), &add, &shift);
4170 #ifdef _TARGET_64BIT_
4171 magic = MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &add, &shift);
4177 // Depending on the "add" flag returned by GetUnsignedMagicNumberForDivide we need to generate:
4178 // add == false (when divisor == 3 for example):
4179 // div = (dividend MULHI magic) RSZ shift
4180 // add == true (when divisor == 7 for example):
4181 // mulhi = dividend MULHI magic
4182 // div = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1)
4183 const bool requiresAdjustment = add;
4184 const bool requiresDividendMultiuse = requiresAdjustment || !isDiv;
4185 const unsigned curBBWeight = m_block->getBBWeight(comp);
4186 unsigned dividendLclNum = BAD_VAR_NUM;
4188 if (requiresDividendMultiuse)
4190 LIR::Use dividendUse(BlockRange(), &divMod->gtOp1, divMod);
4191 dividendLclNum = ReplaceWithLclVar(dividendUse);
4192 dividend = divMod->gtGetOp1();
4195 // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node.
4196 // The existing node will later be transformed into a GT_RSZ/GT_SUB that
4197 // computes the final result. This way don't need to find and change the use
4198 // of the existing node.
4199 GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, dividend, divisor);
4200 mulhi->gtFlags |= GTF_UNSIGNED;
4201 divisor->AsIntCon()->SetIconValue(magic);
4202 BlockRange().InsertBefore(divMod, mulhi);
4203 GenTree* firstNode = mulhi;
4205 if (requiresAdjustment)
4207 GenTree* dividend = comp->gtNewLclvNode(dividendLclNum, type);
4208 GenTree* sub = comp->gtNewOperNode(GT_SUB, type, dividend, mulhi);
4209 BlockRange().InsertBefore(divMod, dividend, sub);
4210 comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
4212 GenTree* one = comp->gtNewIconNode(1, TYP_INT);
4213 GenTree* rsz = comp->gtNewOperNode(GT_RSZ, type, sub, one);
4214 BlockRange().InsertBefore(divMod, one, rsz);
4216 LIR::Use mulhiUse(BlockRange(), &sub->gtOp.gtOp2, sub);
4217 unsigned mulhiLclNum = ReplaceWithLclVar(mulhiUse);
4219 GenTree* mulhiCopy = comp->gtNewLclvNode(mulhiLclNum, type);
4220 GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhiCopy);
4221 BlockRange().InsertBefore(divMod, mulhiCopy, add);
4222 comp->lvaTable[mulhiLclNum].incRefCnts(curBBWeight, comp);
4228 GenTree* shiftBy = comp->gtNewIconNode(shift, TYP_INT);
4229 BlockRange().InsertBefore(divMod, shiftBy);
4233 divMod->SetOper(GT_RSZ);
4234 divMod->gtOp1 = mulhi;
4235 divMod->gtOp2 = shiftBy;
4239 GenTree* div = comp->gtNewOperNode(GT_RSZ, type, mulhi, shiftBy);
4241 // divisor UMOD dividend = dividend SUB (div MUL divisor)
4242 GenTree* divisor = comp->gtNewIconNode(divisorValue, type);
4243 GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor);
4244 GenTree* dividend = comp->gtNewLclvNode(dividendLclNum, type);
4246 divMod->SetOper(GT_SUB);
4247 divMod->gtOp1 = dividend;
4248 divMod->gtOp2 = mul;
4250 BlockRange().InsertBefore(divMod, div, divisor, mul, dividend);
4251 comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
4253 ContainCheckRange(firstNode, divMod);
4261 // LowerConstIntDivOrMod: Transform integer GT_DIV/GT_MOD nodes with a power of 2
4262 // const divisor into equivalent but faster sequences.
4265 // node - pointer to the DIV or MOD node
4268 // The next node to lower.
4270 GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
4272 assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
4273 GenTree* next = node->gtNext;
4274 GenTree* divMod = node;
4275 GenTree* dividend = divMod->gtGetOp1();
4276 GenTree* divisor = divMod->gtGetOp2();
4278 if (!divisor->IsCnsIntOrI())
4280 return next; // no transformations to make
4283 const var_types type = divMod->TypeGet();
4284 assert((type == TYP_INT) || (type == TYP_LONG));
4286 if (dividend->IsCnsIntOrI())
4288 // We shouldn't see a divmod with constant operands here but if we do then it's likely
4289 // because optimizations are disabled or it's a case that's supposed to throw an exception.
4290 // Don't optimize this.
4294 ssize_t divisorValue = divisor->gtIntCon.IconValue();
4296 if (divisorValue == -1 || divisorValue == 0)
4298 // x / 0 and x % 0 can't be optimized because they are required to throw an exception.
4300 // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception.
4302 // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is
4303 // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this
4304 // case so optimizing this case would break C# code.
4306 // A runtime check could be used to handle this case but it's probably too rare to matter.
4310 bool isDiv = divMod->OperGet() == GT_DIV;
4314 if ((type == TYP_INT && divisorValue == INT_MIN) || (type == TYP_LONG && divisorValue == INT64_MIN))
4316 // If the divisor is the minimum representable integer value then we can use a compare,
4317 // the result is 1 iff the dividend equals divisor.
4318 divMod->SetOper(GT_EQ);
4319 ContainCheckCompare(divMod->AsOp());
4324 size_t absDivisorValue =
4325 (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue));
4327 if (!isPow2(absDivisorValue))
4329 if (comp->opts.MinOpts())
4334 #ifdef _TARGET_XARCH_
4338 if (type == TYP_INT)
4340 magic = MagicDivide::GetSigned32Magic(static_cast<int32_t>(divisorValue), &shift);
4344 #ifdef _TARGET_64BIT_
4345 magic = MagicDivide::GetSigned64Magic(static_cast<int64_t>(divisorValue), &shift);
4351 divisor->gtIntConCommon.SetIconValue(magic);
4353 // Insert a new GT_MULHI node in front of the existing GT_DIV/GT_MOD node.
4354 // The existing node will later be transformed into a GT_ADD/GT_SUB that
4355 // computes the final result. This way don't need to find and change the
4356 // use of the existing node.
4357 GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, divisor, dividend);
4358 BlockRange().InsertBefore(divMod, mulhi);
4360 // mulhi was the easy part. Now we need to generate different code depending
4361 // on the divisor value:
4363 // div = signbit(mulhi) + mulhi
4365 // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust
4367 // mulhi += dividend ; requires add adjust
4368 // div = signbit(mulhi) + sar(mulhi, 2) ; requires shift adjust
4370 // mulhi -= dividend ; requires sub adjust
4371 // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust
4372 bool requiresAddSubAdjust = signum(divisorValue) != signum(magic);
4373 bool requiresShiftAdjust = shift != 0;
4374 bool requiresDividendMultiuse = requiresAddSubAdjust || !isDiv;
4375 unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
4376 unsigned dividendLclNum = BAD_VAR_NUM;
4378 if (requiresDividendMultiuse)
4380 LIR::Use dividendUse(BlockRange(), &mulhi->gtOp.gtOp2, mulhi);
4381 dividendLclNum = ReplaceWithLclVar(dividendUse);
4386 if (requiresAddSubAdjust)
4388 dividend = comp->gtNewLclvNode(dividendLclNum, type);
4389 comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
4391 adjusted = comp->gtNewOperNode(divisorValue > 0 ? GT_ADD : GT_SUB, type, mulhi, dividend);
4392 BlockRange().InsertBefore(divMod, dividend, adjusted);
4399 GenTree* shiftBy = comp->gtNewIconNode(genTypeSize(type) * 8 - 1, type);
4400 GenTree* signBit = comp->gtNewOperNode(GT_RSZ, type, adjusted, shiftBy);
4401 BlockRange().InsertBefore(divMod, shiftBy, signBit);
4403 LIR::Use adjustedUse(BlockRange(), &signBit->gtOp.gtOp1, signBit);
4404 unsigned adjustedLclNum = ReplaceWithLclVar(adjustedUse);
4405 adjusted = comp->gtNewLclvNode(adjustedLclNum, type);
4406 comp->lvaTable[adjustedLclNum].incRefCnts(curBBWeight, comp);
4407 BlockRange().InsertBefore(divMod, adjusted);
4409 if (requiresShiftAdjust)
4411 shiftBy = comp->gtNewIconNode(shift, TYP_INT);
4412 adjusted = comp->gtNewOperNode(GT_RSH, type, adjusted, shiftBy);
4413 BlockRange().InsertBefore(divMod, shiftBy, adjusted);
4418 divMod->SetOperRaw(GT_ADD);
4419 divMod->gtOp.gtOp1 = adjusted;
4420 divMod->gtOp.gtOp2 = signBit;
4424 GenTree* div = comp->gtNewOperNode(GT_ADD, type, adjusted, signBit);
4426 dividend = comp->gtNewLclvNode(dividendLclNum, type);
4427 comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
4429 // divisor % dividend = dividend - divisor x div
4430 GenTree* divisor = comp->gtNewIconNode(divisorValue, type);
4431 GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor);
4432 BlockRange().InsertBefore(divMod, dividend, div, divisor, mul);
4434 divMod->SetOperRaw(GT_SUB);
4435 divMod->gtOp.gtOp1 = dividend;
4436 divMod->gtOp.gtOp2 = mul;
4441 // Currently there's no GT_MULHI for ARM32/64
4446 // We're committed to the conversion now. Go find the use if any.
4448 if (!BlockRange().TryGetUse(node, &use))
4453 // We need to use the dividend node multiple times so its value needs to be
4454 // computed once and stored in a temp variable.
4456 unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
4458 LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod);
4459 ReplaceWithLclVar(opDividend);
4461 dividend = divMod->gtGetOp1();
4462 assert(dividend->OperGet() == GT_LCL_VAR);
4464 unsigned dividendLclNum = dividend->gtLclVar.gtLclNum;
4466 GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63));
4468 if (absDivisorValue == 2)
4470 // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1.
4471 // We can get the same result by using GT_RSZ instead of GT_RSH.
4472 adjustment->SetOper(GT_RSZ);
4476 adjustment = comp->gtNewOperNode(GT_AND, type, adjustment, comp->gtNewIconNode(absDivisorValue - 1, type));
4479 GenTree* adjustedDividend =
4480 comp->gtNewOperNode(GT_ADD, type, adjustment, comp->gtNewLclvNode(dividendLclNum, type));
4482 comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
4488 // perform the division by right shifting the adjusted dividend
4489 divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue));
4491 newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor);
4492 ContainCheckShiftRotate(newDivMod->AsOp());
4494 if (divisorValue < 0)
4496 // negate the result if the divisor is negative
4497 newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod);
4498 ContainCheckNode(newDivMod);
4503 // divisor % dividend = dividend - divisor x (dividend / divisor)
4504 // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor)
4505 // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1)
4506 divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1));
4508 newDivMod = comp->gtNewOperNode(GT_SUB, type, comp->gtNewLclvNode(dividendLclNum, type),
4509 comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor));
4510 ContainCheckBinary(newDivMod->AsOp());
4512 comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
4515 // Remove the divisor and dividend nodes from the linear order,
4516 // since we have reused them and will resequence the tree
4517 BlockRange().Remove(divisor);
4518 BlockRange().Remove(dividend);
4520 // linearize and insert the new tree before the original divMod node
4521 InsertTreeBeforeAndContainCheck(divMod, newDivMod);
4522 BlockRange().Remove(divMod);
4524 // replace the original divmod node with the new divmod tree
4525 use.ReplaceWith(comp, newDivMod);
4527 return newDivMod->gtNext;
4529 //------------------------------------------------------------------------
4530 // LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
4531 // const divisor into equivalent but faster sequences.
4534 // node - the DIV or MOD node
4537 // The next node to lower.
4539 GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
4541 assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
4542 GenTree* next = node->gtNext;
4543 GenTree* divMod = node;
4544 GenTree* dividend = divMod->gtGetOp1();
4545 GenTree* divisor = divMod->gtGetOp2();
4547 #ifdef _TARGET_XARCH_
4548 if (!varTypeIsFloating(node->TypeGet()))
4549 #endif // _TARGET_XARCH_
4551 next = LowerConstIntDivOrMod(node);
4554 if ((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD))
4556 ContainCheckDivOrMod(node->AsOp());
4562 void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node)
4565 if (node->TypeGet() == TYP_SIMD12)
4568 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
4569 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
4570 // reading and writing purposes.
4573 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
4574 // registers or on stack, the upper most 4-bytes will be zero.
4576 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
4577 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
4580 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
4581 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
4582 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
4583 // there is no need to clear upper 4-bytes of Vector3 type args.
4585 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
4586 // Vector3 return values are returned two return registers and Caller assembles them into a
4587 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
4588 // type args in prolog and Vector3 type return value of a call
4590 // RyuJIT x86 Windows: all non-param Vector3 local vars are allocated as 16 bytes. Vector3 arguments
4591 // are pushed as 12 bytes. For return values, a 16-byte local is allocated and the address passed
4592 // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear
4595 unsigned varNum = node->AsLclVarCommon()->GetLclNum();
4596 LclVarDsc* varDsc = &comp->lvaTable[varNum];
4598 if (comp->lvaMapSimd12ToSimd16(varDsc))
4600 JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n");
4602 JITDUMP("============");
4604 node->gtType = TYP_SIMD16;
4607 #endif // FEATURE_SIMD
4610 //------------------------------------------------------------------------
4611 // LowerArrElem: Lower a GT_ARR_ELEM node
4614 // node - the GT_ARR_ELEM node to lower.
4617 // The next node to lower.
4620 // pTree points to a pointer to a GT_ARR_ELEM node.
4623 // This performs the following lowering. We start with a node of the form:
4629 // First, we create temps for arrObj if it is not already a lclVar, and for any of the index
4630 // expressions that have side-effects.
4631 // We then transform the tree into:
4632 // <offset is null - no accumulated offset for the first index>
4635 // /--* ArrIndex[i, ]
4637 // /--| arrOffs[i, ]
4640 // +--* ArrIndex[*,j]
4642 // /--| arrOffs[*,j]
4643 // +--* lclVar NewTemp
4644 // /--* lea (scale = element size, offset = offset of first element)
4646 // The new stmtExpr may be omitted if the <arrObj> is a lclVar.
4647 // The new stmtExpr may be embedded if the <arrObj> is not the first tree in linear order for
4648 // the statement containing the original arrMD.
4649 // Note that the arrMDOffs is the INDEX of the lea, but is evaluated before the BASE (which is the second
4650 // reference to NewTemp), because that provides more accurate lifetimes.
4651 // There may be 1, 2 or 3 dimensions, with 1, 2 or 3 arrMDIdx nodes, respectively.
4653 GenTree* Lowering::LowerArrElem(GenTree* node)
4655 // This will assert if we don't have an ArrElem node
4656 GenTreeArrElem* arrElem = node->AsArrElem();
4657 const unsigned char rank = arrElem->gtArrElem.gtArrRank;
4658 const unsigned blockWeight = m_block->getBBWeight(comp);
4660 JITDUMP("Lowering ArrElem\n");
4661 JITDUMP("============\n");
4662 DISPTREERANGE(BlockRange(), arrElem);
4665 assert(arrElem->gtArrObj->TypeGet() == TYP_REF);
4667 // We need to have the array object in a lclVar.
4668 if (!arrElem->gtArrObj->IsLocal())
4670 LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem);
4671 ReplaceWithLclVar(arrObjUse);
4674 GenTree* arrObjNode = arrElem->gtArrObj;
4675 assert(arrObjNode->IsLocal());
4677 LclVarDsc* const varDsc = &comp->lvaTable[arrElem->gtArrObj->AsLclVarCommon()->gtLclNum];
4679 GenTree* insertionPoint = arrElem;
4681 // The first ArrOffs node will have 0 for the offset of the previous dimension.
4682 GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
4683 BlockRange().InsertBefore(insertionPoint, prevArrOffs);
4684 GenTree* nextToLower = prevArrOffs;
4686 for (unsigned char dim = 0; dim < rank; dim++)
4688 GenTree* indexNode = arrElem->gtArrElem.gtArrInds[dim];
4690 // Use the original arrObjNode on the 0th ArrIndex node, and clone it for subsequent ones.
4691 GenTreePtr idxArrObjNode;
4694 idxArrObjNode = arrObjNode;
4698 idxArrObjNode = comp->gtClone(arrObjNode);
4699 varDsc->incRefCnts(blockWeight, comp);
4700 BlockRange().InsertBefore(insertionPoint, idxArrObjNode);
4703 // Next comes the GT_ARR_INDEX node.
4704 GenTreeArrIndex* arrMDIdx = new (comp, GT_ARR_INDEX)
4705 GenTreeArrIndex(TYP_INT, idxArrObjNode, indexNode, dim, rank, arrElem->gtArrElem.gtArrElemType);
4706 arrMDIdx->gtFlags |= ((idxArrObjNode->gtFlags | indexNode->gtFlags) & GTF_ALL_EFFECT);
4707 BlockRange().InsertBefore(insertionPoint, arrMDIdx);
4709 GenTree* offsArrObjNode = comp->gtClone(arrObjNode);
4710 varDsc->incRefCnts(blockWeight, comp);
4711 BlockRange().InsertBefore(insertionPoint, offsArrObjNode);
4713 GenTreeArrOffs* arrOffs =
4714 new (comp, GT_ARR_OFFSET) GenTreeArrOffs(TYP_I_IMPL, prevArrOffs, arrMDIdx, offsArrObjNode, dim, rank,
4715 arrElem->gtArrElem.gtArrElemType);
4716 arrOffs->gtFlags |= ((prevArrOffs->gtFlags | arrMDIdx->gtFlags | offsArrObjNode->gtFlags) & GTF_ALL_EFFECT);
4717 BlockRange().InsertBefore(insertionPoint, arrOffs);
4719 prevArrOffs = arrOffs;
4722 // Generate the LEA and make it reverse evaluation, because we want to evaluate the index expression before the
4724 unsigned scale = arrElem->gtArrElem.gtArrElemSize;
4725 unsigned offset = comp->eeGetMDArrayDataOffset(arrElem->gtArrElem.gtArrElemType, arrElem->gtArrElem.gtArrRank);
4727 GenTreePtr leaIndexNode = prevArrOffs;
4728 if (!jitIsScaleIndexMul(scale))
4730 // We do the address arithmetic in TYP_I_IMPL, though note that the lower bounds and lengths in memory are
4732 GenTreePtr scaleNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, scale);
4733 GenTreePtr mulNode = new (comp, GT_MUL) GenTreeOp(GT_MUL, TYP_I_IMPL, leaIndexNode, scaleNode);
4734 BlockRange().InsertBefore(insertionPoint, scaleNode, mulNode);
4735 leaIndexNode = mulNode;
4739 GenTreePtr leaBase = comp->gtClone(arrObjNode);
4740 varDsc->incRefCnts(blockWeight, comp);
4741 BlockRange().InsertBefore(insertionPoint, leaBase);
4743 GenTreePtr leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset);
4745 BlockRange().InsertBefore(insertionPoint, leaNode);
4747 LIR::Use arrElemUse;
4748 if (BlockRange().TryGetUse(arrElem, &arrElemUse))
4750 arrElemUse.ReplaceWith(comp, leaNode);
4754 leaNode->SetUnusedValue();
4757 BlockRange().Remove(arrElem);
4759 JITDUMP("Results of lowering ArrElem:\n");
4760 DISPTREERANGE(BlockRange(), leaNode);
4766 void Lowering::DoPhase()
4768 // If we have any PInvoke calls, insert the one-time prolog code. We'll inserted the epilog code in the
4769 // appropriate spots later. NOTE: there is a minor optimization opportunity here, as we still create p/invoke
4770 // data structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
4771 if (comp->info.compCallUnmanaged)
4773 InsertPInvokeMethodProlog();
4776 #if !defined(_TARGET_64BIT_)
4777 DecomposeLongs decomp(comp); // Initialize the long decomposition class.
4778 if (comp->compLongUsed)
4780 decomp.PrepareForDecomposition();
4782 #endif // !defined(_TARGET_64BIT_)
4784 for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
4786 /* Make the block publicly available */
4787 comp->compCurBB = block;
4789 #if !defined(_TARGET_64BIT_)
4790 if (comp->compLongUsed)
4792 decomp.DecomposeBlock(block);
4794 #endif //!_TARGET_64BIT_
4800 JITDUMP("Lower has completed modifying nodes, proceeding to initialize LSRA TreeNodeInfo structs...\n");
4803 comp->fgDispBasicBlocks(true);
4807 // TODO-Throughput: We re-sort local variables to get the goodness of enregistering recently
4808 // introduced local variables both by Rationalize and Lower; downside is we need to
4809 // recompute standard local variable liveness in order to get Linear CodeGen working.
4810 // For now we'll take the throughput hit of recomputing local liveness but in the long term
4811 // we're striving to use the unified liveness computation (fgLocalVarLiveness) and stop
4812 // computing it separately in LSRA.
4813 if ((comp->lvaCount != 0) && comp->backendRequiresLocalVarLifetimes())
4815 comp->lvaSortAgain = true;
4817 comp->EndPhase(PHASE_LOWERING_DECOMP);
4819 comp->fgLocalVarLiveness();
4820 // local var liveness can delete code, which may create empty blocks
4821 if (!comp->opts.MinOpts() && !comp->opts.compDbgCode)
4823 comp->optLoopsMarked = false;
4824 bool modified = comp->fgUpdateFlowGraph();
4825 if (modified || comp->lvaSortAgain)
4827 JITDUMP("had to run another liveness pass:\n");
4828 comp->fgLocalVarLiveness();
4833 JITDUMP("Liveness pass finished after lowering, IR:\n");
4834 JITDUMP("lvasortagain = %d\n", comp->lvaSortAgain);
4837 comp->fgDispBasicBlocks(true);
4840 for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
4842 assert(LIR::AsRange(block).CheckLIR(comp, true));
4846 // The initialization code for the TreeNodeInfo map was initially part of a single full IR
4847 // traversal and it has been split because the order of traversal performed by fgWalkTreePost
4848 // does not necessarily lower nodes in execution order and also, it could potentially
4849 // add new BasicBlocks on the fly as part of the Lowering pass so the traversal won't be complete.
4851 // Doing a new traversal guarantees we 'see' all new introduced trees and basic blocks allowing us
4852 // to correctly initialize all the data structures LSRA requires later on.
4853 // This code still has issues when it has to do with initialization of recently introduced locals by
4854 // lowering. The effect of this is that any temporary local variable introduced by lowering won't be
4855 // enregistered yielding suboptimal CQ.
4856 // The reason for this is because we cannot re-sort the local variables per ref-count and bump of the number of
4857 // tracked variables just here because then LSRA will work with mismatching BitSets (i.e. BitSets with different
4858 // 'epochs' that were created before and after variable resorting, that will result in different number of tracked
4859 // local variables).
4861 // The fix for this is to refactor this code to be run JUST BEFORE LSRA and not as part of lowering.
4862 // It's also desirable to avoid initializing this code using a non-execution order traversal.
4864 LsraLocation currentLoc = 1;
4865 for (BasicBlock* block = m_lsra->startBlockSequence(); block != nullptr; block = m_lsra->moveToNextBlock())
4869 // Increment the LsraLocation (currentLoc) at each BasicBlock.
4870 // This ensures that the block boundary (RefTypeBB, RefTypeExpUse and RefTypeDummyDef) RefPositions
4871 // are in increasing location order.
4875 for (GenTree* node : BlockRange().NonPhiNodes())
4877 // We increment the number position of each tree node by 2 to simplify the logic when there's the case of
4878 // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to
4879 // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only
4880 // increment the number position by one.
4881 CLANG_FORMAT_COMMENT_ANCHOR;
4884 node->gtSeqNum = currentLoc;
4885 // In DEBUG, we want to set the gtRegTag to GT_REGTAG_REG, so that subsequent dumps will so the register
4887 // Although this looks like a no-op it sets the tag.
4888 node->gtRegNum = node->gtRegNum;
4891 node->gtLsraInfo.Initialize(m_lsra, node, currentLoc);
4895 TreeNodeInfoInit(node);
4897 // Only nodes that produce values should have a non-zero dstCount.
4898 assert((node->gtLsraInfo.dstCount == 0) || node->IsValue());
4900 // If the node produces an unused value, mark it as a local def-use
4901 if (node->IsValue() && node->IsUnusedValue())
4903 node->gtLsraInfo.isLocalDefUse = true;
4904 node->gtLsraInfo.dstCount = 0;
4908 // TODO-CQ: Enable this code after fixing the isContained() logic to not abort for these
4909 // top-level nodes that throw away their result.
4910 // If this is an interlocked operation that has a non-last-use lclVar as its op2,
4911 // make sure we allocate a target register for the interlocked operation.; otherwise we need
4912 // not allocate a register
4913 else if ((tree->OperGet() == GT_LOCKADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_XADD))
4915 tree->gtLsraInfo.dstCount = 0;
4916 if (tree->gtGetOp2()->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH) == 0)
4917 tree->gtLsraInfo.isLocalDefUse = true;
4922 assert(BlockRange().CheckLIR(comp, true));
4924 DBEXEC(VERBOSE, DumpNodeInfoMap());
4929 //------------------------------------------------------------------------
4930 // Lowering::CheckCallArg: check that a call argument is in an expected
4931 // form after lowering.
4934 // arg - the argument to check.
4936 void Lowering::CheckCallArg(GenTree* arg)
4938 if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
4943 switch (arg->OperGet())
4947 GenTreeFieldList* list = arg->AsFieldList();
4948 assert(list->isContained());
4949 assert(list->IsFieldListHead());
4951 for (; list != nullptr; list = list->Rest())
4953 assert(list->Current()->OperIsPutArg());
4959 assert(arg->OperIsPutArg());
4964 //------------------------------------------------------------------------
4965 // Lowering::CheckCall: check that a call is in an expected form after
4966 // lowering. Currently this amounts to checking its
4967 // arguments, but could be expanded to verify more
4968 // properties in the future.
4971 // call - the call to check.
4973 void Lowering::CheckCall(GenTreeCall* call)
4975 if (call->gtCallObjp != nullptr)
4977 CheckCallArg(call->gtCallObjp);
4980 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
4982 CheckCallArg(args->Current());
4985 for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest())
4987 CheckCallArg(args->Current());
4991 //------------------------------------------------------------------------
4992 // Lowering::CheckNode: check that an LIR node is in an expected form
4996 // compiler - the compiler context.
4997 // node - the node to check.
4999 void Lowering::CheckNode(Compiler* compiler, GenTree* node)
5001 switch (node->OperGet())
5004 CheckCall(node->AsCall());
5009 assert(node->TypeGet() != TYP_SIMD12);
5011 #ifdef _TARGET_64BIT_
5013 case GT_STORE_LCL_VAR:
5015 unsigned lclNum = node->AsLclVarCommon()->GetLclNum();
5016 LclVarDsc* lclVar = &compiler->lvaTable[lclNum];
5017 assert(node->TypeGet() != TYP_SIMD12 || compiler->lvaIsFieldOfDependentlyPromotedStruct(lclVar));
5020 #endif // _TARGET_64BIT_
5028 //------------------------------------------------------------------------
5029 // Lowering::CheckBlock: check that the contents of an LIR block are in an
5030 // expected form after lowering.
5033 // compiler - the compiler context.
5034 // block - the block to check.
5036 bool Lowering::CheckBlock(Compiler* compiler, BasicBlock* block)
5038 assert(block->isEmpty() || block->IsLIR());
5040 LIR::Range& blockRange = LIR::AsRange(block);
5041 for (GenTree* node : blockRange)
5043 CheckNode(compiler, node);
5046 assert(blockRange.CheckLIR(compiler, true));
5051 void Lowering::LowerBlock(BasicBlock* block)
5053 assert(block == comp->compCurBB); // compCurBB must already be set.
5054 assert(block->isEmpty() || block->IsLIR());
5058 // NOTE: some of the lowering methods insert calls before the node being
5059 // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In
5060 // general, any code that is inserted before the current node should be
5061 // "pre-lowered" as they won't be subject to further processing.
5062 // Lowering::CheckBlock() runs some extra checks on call arguments in
5063 // order to help catch unlowered nodes.
5065 GenTree* node = BlockRange().FirstNode();
5066 while (node != nullptr)
5068 node = LowerNode(node);
5071 assert(CheckBlock(comp, block));
5074 /** Verifies if both of these trees represent the same indirection.
5075 * Used by Lower to annotate if CodeGen generate an instruction of the
5076 * form *addrMode BinOp= expr
5078 * Preconditions: both trees are children of GT_INDs and their underlying children
5079 * have the same gtOper.
5081 * This is a first iteration to actually recognize trees that can be code-generated
5082 * as a single read-modify-write instruction on AMD64/x86. For now
5083 * this method only supports the recognition of simple addressing modes (through GT_LEA)
5084 * or local var indirections. Local fields, array access and other more complex nodes are
5085 * not yet supported.
5087 * TODO-CQ: Perform tree recognition by using the Value Numbering Package, that way we can recognize
5088 * arbitrary complex trees and support much more addressing patterns.
5090 bool Lowering::IndirsAreEquivalent(GenTreePtr candidate, GenTreePtr storeInd)
5092 assert(candidate->OperGet() == GT_IND);
5093 assert(storeInd->OperGet() == GT_STOREIND);
5095 // We should check the size of the indirections. If they are
5096 // different, say because of a cast, then we can't call them equivalent. Doing so could cause us
5098 // Signed-ness difference is okay and expected since a store indirection must always
5099 // be signed based on the CIL spec, but a load could be unsigned.
5100 if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType))
5105 GenTreePtr pTreeA = candidate->gtGetOp1();
5106 GenTreePtr pTreeB = storeInd->gtGetOp1();
5108 // This method will be called by codegen (as well as during lowering).
5109 // After register allocation, the sources may have been spilled and reloaded
5110 // to a different register, indicated by an inserted GT_RELOAD node.
5111 pTreeA = pTreeA->gtSkipReloadOrCopy();
5112 pTreeB = pTreeB->gtSkipReloadOrCopy();
5117 if (pTreeA->OperGet() != pTreeB->OperGet())
5122 oper = pTreeA->OperGet();
5126 case GT_LCL_VAR_ADDR:
5127 case GT_CLS_VAR_ADDR:
5129 return NodesAreEquivalentLeaves(pTreeA, pTreeB);
5133 GenTreeAddrMode* gtAddr1 = pTreeA->AsAddrMode();
5134 GenTreeAddrMode* gtAddr2 = pTreeB->AsAddrMode();
5135 return NodesAreEquivalentLeaves(gtAddr1->Base(), gtAddr2->Base()) &&
5136 NodesAreEquivalentLeaves(gtAddr1->Index(), gtAddr2->Index()) &&
5137 gtAddr1->gtScale == gtAddr2->gtScale && gtAddr1->gtOffset == gtAddr2->gtOffset;
5140 // We don't handle anything that is not either a constant,
5141 // a local var or LEA.
5146 /** Test whether the two given nodes are the same leaves.
5147 * Right now, only constant integers and local variables are supported
5149 bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2)
5151 if (tree1 == nullptr && tree2 == nullptr)
5156 // both null, they are equivalent, otherwise if either is null not equivalent
5157 if (tree1 == nullptr || tree2 == nullptr)
5162 tree1 = tree1->gtSkipReloadOrCopy();
5163 tree2 = tree2->gtSkipReloadOrCopy();
5165 if (tree1->TypeGet() != tree2->TypeGet())
5170 if (tree1->OperGet() != tree2->OperGet())
5175 if (!tree1->OperIsLeaf() || !tree2->OperIsLeaf())
5180 switch (tree1->OperGet())
5183 return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal &&
5184 tree1->IsIconHandle() == tree2->IsIconHandle();
5186 case GT_LCL_VAR_ADDR:
5187 return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum;
5188 case GT_CLS_VAR_ADDR:
5189 return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd;
5196 * Get common information required to handle a cast instruction
5198 void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
5200 // Intialize castInfo
5201 memset(castInfo, 0, sizeof(*castInfo));
5203 GenTreePtr castOp = treeNode->gtCast.CastOp();
5205 var_types dstType = treeNode->CastToType();
5206 var_types srcType = castOp->TypeGet();
5208 castInfo->unsignedDest = varTypeIsUnsigned(dstType);
5209 castInfo->unsignedSource = varTypeIsUnsigned(srcType);
5211 // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set.
5212 if (!castInfo->unsignedSource && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
5214 srcType = genUnsignedType(srcType);
5215 castInfo->unsignedSource = true;
5218 if (treeNode->gtOverflow() &&
5219 (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG)))
5221 castInfo->requiresOverflowCheck = true;
5224 if (castInfo->requiresOverflowCheck)
5226 ssize_t typeMin = 0;
5227 ssize_t typeMax = 0;
5228 ssize_t typeMask = 0;
5229 bool signCheckOnly = false;
5231 // Do we need to compare the value, or just check masks
5235 assert(!"unreachable: getCastDescription");
5239 typeMask = ssize_t((int)0xFFFFFF80);
5240 typeMin = SCHAR_MIN;
5241 typeMax = SCHAR_MAX;
5245 typeMask = ssize_t((int)0xFFFFFF00L);
5249 typeMask = ssize_t((int)0xFFFF8000);
5255 typeMask = ssize_t((int)0xFFFF0000L);
5259 if (srcType == TYP_UINT)
5261 signCheckOnly = true;
5265 #ifdef _TARGET_64BIT_
5266 typeMask = 0xFFFFFFFF80000000LL;
5268 typeMask = 0x80000000;
5276 if (srcType == TYP_INT)
5278 signCheckOnly = true;
5282 #ifdef _TARGET_64BIT_
5283 typeMask = 0xFFFFFFFF00000000LL;
5285 typeMask = 0x00000000;
5291 signCheckOnly = true;
5295 signCheckOnly = true;
5301 castInfo->signCheckOnly = true;
5304 castInfo->typeMax = typeMax;
5305 castInfo->typeMin = typeMin;
5306 castInfo->typeMask = typeMask;
5310 //------------------------------------------------------------------------
5311 // Containment Analysis
5312 //------------------------------------------------------------------------
5313 void Lowering::ContainCheckNode(GenTree* node)
5315 switch (node->gtOper)
5317 case GT_STORE_LCL_VAR:
5318 case GT_STORE_LCL_FLD:
5319 ContainCheckStoreLoc(node->AsLclVarCommon());
5331 ContainCheckCompare(node->AsOp());
5335 ContainCheckJTrue(node->AsOp());
5340 #if !defined(_TARGET_64BIT_)
5349 ContainCheckBinary(node->AsOp());
5352 #ifdef _TARGET_XARCH_
5354 // Codegen of this tree node sets ZF and SF flags.
5355 if (!varTypeIsFloating(node))
5357 node->gtFlags |= GTF_ZSF_SET;
5360 #endif // _TARGET_XARCH_
5362 #if defined(_TARGET_X86_)
5367 ContainCheckMul(node->AsOp());
5373 ContainCheckDivOrMod(node->AsOp());
5380 #ifndef _TARGET_64BIT_
5384 ContainCheckShiftRotate(node->AsOp());
5387 ContainCheckArrOffset(node->AsArrOffs());
5390 ContainCheckLclHeap(node->AsOp());
5393 ContainCheckRet(node->AsOp());
5396 ContainCheckReturnTrap(node->AsOp());
5399 ContainCheckStoreIndir(node->AsIndir());
5401 ContainCheckIndir(node->AsIndir());
5406 case GT_PUTARG_SPLIT:
5408 // The regNum must have been set by the lowering of the call.
5409 assert(node->gtRegNum != REG_NA);
5411 #ifdef _TARGET_XARCH_
5413 ContainCheckIntrinsic(node->AsOp());
5415 #endif // _TARGET_XARCH_
5418 ContainCheckSIMD(node->AsSIMD());
5420 #endif // FEATURE_SIMD
5426 //------------------------------------------------------------------------
5427 // GetIndirSourceCount: Get the source registers for an indirection that might be contained.
5430 // node - The node of interest
5433 // The number of source registers used by the *parent* of this node.
5435 int Lowering::GetIndirSourceCount(GenTreeIndir* indirTree)
5437 GenTree* const addr = indirTree->gtOp1;
5438 if (!addr->isContained())
5442 if (!addr->OperIs(GT_LEA))
5447 GenTreeAddrMode* const addrMode = addr->AsAddrMode();
5449 unsigned srcCount = 0;
5450 if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained())
5454 if (addrMode->Index() != nullptr)
5456 // We never have a contained index.
5457 assert(!addrMode->Index()->isContained());
5463 //------------------------------------------------------------------------
5464 // ContainCheckDivOrMod: determine which operands of a div/mod should be contained.
5467 // node - pointer to the GT_UDIV/GT_UMOD node
5469 void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
5471 assert(node->OperIs(GT_DIV, GT_MOD, GT_UDIV, GT_UMOD));
5473 #ifdef _TARGET_XARCH_
5474 GenTree* dividend = node->gtGetOp1();
5475 GenTree* divisor = node->gtGetOp2();
5477 if (varTypeIsFloating(node->TypeGet()))
5479 // No implicit conversions at this stage as the expectation is that
5480 // everything is made explicit by adding casts.
5481 assert(dividend->TypeGet() == divisor->TypeGet());
5483 if (IsContainableMemoryOp(divisor) || divisor->IsCnsNonZeroFltOrDbl())
5485 MakeSrcContained(node, divisor);
5489 // If there are no containable operands, we can make an operand reg optional.
5490 // SSE2 allows only divisor to be a memory-op.
5491 SetRegOptional(divisor);
5495 bool divisorCanBeRegOptional = true;
5497 if (dividend->OperGet() == GT_LONG)
5499 divisorCanBeRegOptional = false;
5500 MakeSrcContained(node, dividend);
5504 // divisor can be an r/m, but the memory indirection must be of the same size as the divide
5505 if (IsContainableMemoryOp(divisor) && (divisor->TypeGet() == node->TypeGet()))
5507 MakeSrcContained(node, divisor);
5509 else if (divisorCanBeRegOptional)
5511 // If there are no containable operands, we can make an operand reg optional.
5512 // Div instruction allows only divisor to be a memory op.
5513 SetRegOptional(divisor);
5515 #endif // _TARGET_XARCH_
5518 //------------------------------------------------------------------------
5519 // ContainCheckReturnTrap: determine whether the source of a RETURNTRAP should be contained.
5522 // node - pointer to the GT_RETURNTRAP node
5524 void Lowering::ContainCheckReturnTrap(GenTreeOp* node)
5526 #ifdef _TARGET_XARCH_
5527 assert(node->OperIs(GT_RETURNTRAP));
5528 // This just turns into a compare of its child with an int + a conditional call
5529 if (node->gtOp1->isIndir())
5531 MakeSrcContained(node, node->gtOp1);
5533 #endif // _TARGET_XARCH_
5536 //------------------------------------------------------------------------
5537 // ContainCheckArrOffset: determine whether the source of an ARR_OFFSET should be contained.
5540 // node - pointer to the GT_ARR_OFFSET node
5542 void Lowering::ContainCheckArrOffset(GenTreeArrOffs* node)
5544 assert(node->OperIs(GT_ARR_OFFSET));
5545 // we don't want to generate code for this
5546 if (node->gtOffset->IsIntegralConst(0))
5548 MakeSrcContained(node, node->gtArrOffs.gtOffset);
5552 //------------------------------------------------------------------------
5553 // ContainCheckLclHeap: determine whether the source of a GT_LCLHEAP node should be contained.
5556 // node - pointer to the node
5558 void Lowering::ContainCheckLclHeap(GenTreeOp* node)
5560 assert(node->OperIs(GT_LCLHEAP));
5561 GenTreePtr size = node->gtOp.gtOp1;
5562 if (size->IsCnsIntOrI())
5564 MakeSrcContained(node, size);
5568 //------------------------------------------------------------------------
5569 // ContainCheckRet: determine whether the source of a node should be contained.
5572 // node - pointer to the node
5574 void Lowering::ContainCheckRet(GenTreeOp* ret)
5576 assert(ret->OperIs(GT_RETURN));
5578 #if !defined(_TARGET_64BIT_)
5579 if (ret->TypeGet() == TYP_LONG)
5581 GenTree* op1 = ret->gtGetOp1();
5582 noway_assert(op1->OperGet() == GT_LONG);
5583 MakeSrcContained(ret, op1);
5585 #endif // !defined(_TARGET_64BIT_)
5586 #if FEATURE_MULTIREG_RET
5587 if (varTypeIsStruct(ret))
5589 GenTree* op1 = ret->gtGetOp1();
5590 // op1 must be either a lclvar or a multi-reg returning call
5591 if (op1->OperGet() == GT_LCL_VAR)
5593 GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
5594 LclVarDsc* varDsc = &(comp->lvaTable[lclVarCommon->gtLclNum]);
5595 assert(varDsc->lvIsMultiRegRet);
5597 // Mark var as contained if not enregistrable.
5598 if (!varTypeIsEnregisterableStruct(op1))
5600 MakeSrcContained(ret, op1);
5604 #endif // FEATURE_MULTIREG_RET
5607 //------------------------------------------------------------------------
5608 // ContainCheckJTrue: determine whether the source of a JTRUE should be contained.
5611 // node - pointer to the node
5613 void Lowering::ContainCheckJTrue(GenTreeOp* node)
5615 // The compare does not need to be generated into a register.
5616 GenTree* cmp = node->gtGetOp1();
5617 cmp->gtLsraInfo.isNoRegCompare = true;
5620 assert(node->OperIs(GT_JTRUE));
5622 // Say we have the following IR
5623 // simdCompareResult = GT_SIMD((In)Equality, v1, v2)
5624 // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false)
5625 // GT_JTRUE(integerCompareResult)
5627 // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality
5628 // intrinsic will set or clear the Zero flag.
5629 genTreeOps cmpOper = cmp->OperGet();
5630 if (cmpOper == GT_EQ || cmpOper == GT_NE)
5632 GenTree* cmpOp1 = cmp->gtGetOp1();
5633 GenTree* cmpOp2 = cmp->gtGetOp2();
5635 if (cmpOp1->IsSIMDEqualityOrInequality() && (cmpOp2->IsIntegralConst(0) || cmpOp2->IsIntegralConst(1)))
5637 // We always generate code for a SIMD equality comparison, though it produces no value.
5638 // Neither the GT_JTRUE nor the immediate need to be evaluated.
5639 MakeSrcContained(cmp, cmpOp2);
5640 cmpOp1->gtLsraInfo.isNoRegCompare = true;
5641 // We have to reverse compare oper in the following cases:
5642 // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
5643 // Therefore, if compare oper is == or != against false(0), we will
5644 // be checking opposite of what is required.
5646 // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
5647 // Therefore, if compare oper is == or != against true(1), we will
5648 // be checking opposite of what is required.
5649 GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
5650 if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
5652 if (cmpOp2->IsIntegralConst(0))
5654 cmp->SetOper(GenTree::ReverseRelop(cmpOper));
5659 assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
5660 if (cmpOp2->IsIntegralConst(1))
5662 cmp->SetOper(GenTree::ReverseRelop(cmpOper));
5667 #endif // FEATURE_SIMD
5671 void Lowering::DumpNodeInfoMap()
5673 printf("-----------------------------\n");
5674 printf("TREE NODE INFO DUMP\n");
5675 printf("-----------------------------\n");
5677 for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
5679 for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
5681 comp->gtDispTree(node, nullptr, nullptr, true);
5683 node->gtLsraInfo.dump(m_lsra);
5689 #endif // !LEGACY_BACKEND