1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 XX Postconditions (for the nodes currently handled): XX
13 XX - All operands requiring a register are explicit in the graph XX
15 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
16 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
26 #if !defined(_TARGET_64BIT_)
27 #include "decomposelongs.h"
28 #endif // !defined(_TARGET_64BIT_)
30 //------------------------------------------------------------------------
31 // MakeSrcContained: Make "childNode" a contained node
34 // parentNode - is a non-leaf node that can contain its 'childNode'
35 // childNode - is an op that will now be contained by its parent.
38 // If 'childNode' it has any existing sources, they will now be sources for the parent.
40 void Lowering::MakeSrcContained(GenTree* parentNode, GenTree* childNode)
42 assert(!parentNode->OperIsLeaf());
43 assert(childNode->canBeContained());
44 childNode->SetContained();
45 assert(childNode->isContained());
48 //------------------------------------------------------------------------
49 // CheckImmedAndMakeContained: Checks if the 'childNode' is a containable immediate
50 // and, if so, makes it contained.
53 // parentNode - is any non-leaf node
54 // childNode - is an child op of 'parentNode'
57 // true if we are able to make childNode a contained immediate
59 bool Lowering::CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode)
61 assert(!parentNode->OperIsLeaf());
62 // If childNode is a containable immediate
63 if (IsContainableImmed(parentNode, childNode))
65 // then make it contained within the parentNode
66 MakeSrcContained(parentNode, childNode);
72 //------------------------------------------------------------------------
73 // IsSafeToContainMem: Checks for conflicts between childNode and parentNode,
74 // and returns 'true' iff memory operand childNode can be contained in parentNode.
77 // parentNode - any non-leaf node
78 // childNode - some node that is an input to `parentNode`
81 // true if it is safe to make childNode a contained memory operand.
83 bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode)
85 m_scratchSideEffects.Clear();
86 m_scratchSideEffects.AddNode(comp, childNode);
88 for (GenTree* node = childNode->gtNext; node != parentNode; node = node->gtNext)
90 if (m_scratchSideEffects.InterferesWith(comp, node, false))
99 //------------------------------------------------------------------------
101 // This is the main entry point for Lowering.
102 GenTree* Lowering::LowerNode(GenTree* node)
104 assert(node != nullptr);
105 switch (node->gtOper)
108 TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
109 ContainCheckIndir(node->AsIndir());
113 TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
114 if (!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(node))
116 LowerStoreIndir(node->AsIndir());
122 GenTree* afterTransform = LowerAdd(node);
123 if (afterTransform != nullptr)
125 return afterTransform;
130 #if !defined(_TARGET_64BIT_)
140 ContainCheckBinary(node->AsOp());
145 #if defined(_TARGET_X86_)
148 ContainCheckMul(node->AsOp());
153 if (!LowerUnsignedDivOrMod(node->AsOp()))
155 ContainCheckDivOrMod(node->AsOp());
161 return LowerSignedDivOrMod(node);
164 return LowerSwitch(node);
179 return LowerCompare(node);
182 return LowerJTrue(node->AsOp());
185 LowerJmpMethod(node);
193 ContainCheckReturnTrap(node->AsOp());
200 #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
201 case GT_ARR_BOUNDS_CHECK:
204 #endif // FEATURE_SIMD
205 #ifdef FEATURE_HW_INTRINSICS
206 case GT_HW_INTRINSIC_CHK:
207 #endif // FEATURE_HW_INTRINSICS
208 ContainCheckBoundsChk(node->AsBoundsChk());
210 #endif // _TARGET_XARCH_
212 return LowerArrElem(node);
215 ContainCheckArrOffset(node->AsArrOffs());
223 #ifndef _TARGET_64BIT_
226 ContainCheckShiftRotate(node->AsOp());
228 #endif // !_TARGET_64BIT_
233 #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
234 LowerShift(node->AsOp());
236 ContainCheckShiftRotate(node->AsOp());
242 case GT_STORE_DYN_BLK:
244 GenTreeBlk* blkNode = node->AsBlk();
245 TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
246 LowerBlockStore(blkNode);
251 ContainCheckLclHeap(node->AsOp());
254 #ifdef _TARGET_XARCH_
256 ContainCheckIntrinsic(node->AsOp());
258 #endif // _TARGET_XARCH_
262 LowerSIMD(node->AsSIMD());
264 #endif // FEATURE_SIMD
266 #ifdef FEATURE_HW_INTRINSICS
268 LowerHWIntrinsic(node->AsHWIntrinsic());
270 #endif // FEATURE_HW_INTRINSICS
274 // We should only encounter this for lclVars that are lvDoNotEnregister.
275 verifyLclFldDoNotEnregister(node->AsLclVarCommon()->gtLclNum);
280 WidenSIMD12IfNecessary(node->AsLclVarCommon());
283 case GT_STORE_LCL_VAR:
284 WidenSIMD12IfNecessary(node->AsLclVarCommon());
287 case GT_STORE_LCL_FLD:
289 #if defined(_TARGET_AMD64_) && defined(FEATURE_SIMD)
290 GenTreeLclVarCommon* const store = node->AsLclVarCommon();
291 if ((store->TypeGet() == TYP_SIMD8) != (store->gtOp1->TypeGet() == TYP_SIMD8))
293 GenTreeUnOp* bitcast =
294 new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, store->TypeGet(), store->gtOp1, nullptr);
295 store->gtOp1 = bitcast;
296 BlockRange().InsertBefore(store, bitcast);
298 #endif // _TARGET_AMD64_
299 // TODO-1stClassStructs: Once we remove the requirement that all struct stores
300 // are block stores (GT_STORE_BLK or GT_STORE_OBJ), here is where we would put the local
301 // store under a block store if codegen will require it.
302 if ((node->TypeGet() == TYP_STRUCT) && (node->gtGetOp1()->OperGet() != GT_PHI))
304 #if FEATURE_MULTIREG_RET
305 GenTree* src = node->gtGetOp1();
306 assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal());
307 #else // !FEATURE_MULTIREG_RET
308 assert(!"Unexpected struct local store in Lowering");
309 #endif // !FEATURE_MULTIREG_RET
311 LowerStoreLoc(node->AsLclVarCommon());
315 #if defined(_TARGET_ARM64_)
317 CheckImmedAndMakeContained(node, node->AsCmpXchg()->gtOpComparand);
321 CheckImmedAndMakeContained(node, node->gtOp.gtOp2);
323 #elif defined(_TARGET_XARCH_)
325 if (node->IsUnusedValue())
327 node->ClearUnusedValue();
328 // Make sure the types are identical, since the node type is changed to VOID
329 // CodeGen relies on op2's type to determine the instruction size.
330 // Note that the node type cannot be a small int but the data operand can.
331 assert(genActualType(node->gtGetOp2()->TypeGet()) == node->TypeGet());
332 node->SetOper(GT_LOCKADD);
333 node->gtType = TYP_VOID;
334 CheckImmedAndMakeContained(node, node->gtGetOp2());
340 // TODO-ARM-CQ: We should contain this as long as the offset fits.
342 if (node->AsObj()->Addr()->OperIsLocalAddr())
344 node->AsObj()->Addr()->SetContained();
347 #endif // !_TARGET_ARM_
356 /** -- Switch Lowering --
357 * The main idea of switch lowering is to keep transparency of the register requirements of this node
358 * downstream in LSRA. Given that the switch instruction is inherently a control statement which in the JIT
359 * is represented as a simple tree node, at the time we actually generate code for it we end up
360 * generating instructions that actually modify the flow of execution that imposes complicated
361 * register requirement and lifetimes.
363 * So, for the purpose of LSRA, we want to have a more detailed specification of what a switch node actually
364 * means and more importantly, which and when do we need a register for each instruction we want to issue
365 * to correctly allocate them downstream.
367 * For this purpose, this procedure performs switch lowering in two different ways:
369 * a) Represent the switch statement as a zero-index jump table construct. This means that for every destination
370 * of the switch, we will store this destination in an array of addresses and the code generator will issue
371 * a data section where this array will live and will emit code that based on the switch index, will indirect and
372 * jump to the destination specified in the jump table.
374 * For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch
375 * node for jump table based switches.
376 * The overall structure of a GT_SWITCH_TABLE is:
379 * |_________ localVar (a temporary local that holds the switch index)
380 * |_________ jumpTable (this is a special node that holds the address of the jump table array)
382 * Now, the way we morph a GT_SWITCH node into this lowered switch table node form is the following:
384 * Input: GT_SWITCH (inside a basic block whose Branch Type is BBJ_SWITCH)
385 * |_____ expr (an arbitrarily complex GT_NODE that represents the switch index)
387 * This gets transformed into the following statements inside a BBJ_COND basic block (the target would be
388 * the default case of the switch in case the conditional is evaluated to true).
390 * ----- original block, transformed
391 * GT_STORE_LCL_VAR tempLocal (a new temporary local variable used to store the switch index)
392 * |_____ expr (the index expression)
397 * |___ Int_Constant (This constant is the index of the default case
398 * that happens to be the highest index in the jump table).
399 * |___ tempLocal (The local variable were we stored the index expression).
401 * ----- new basic block
404 * |_____ jumpTable (a new jump table node that now LSRA can allocate registers for explicitly
405 * and LinearCodeGen will be responsible to generate downstream).
407 * This way there are no implicit temporaries.
409 * b) For small-sized switches, we will actually morph them into a series of conditionals of the form
410 * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case }
411 * (For the default case conditional, we'll be constructing the exact same code as the jump table case one).
412 * else if (case == firstCase){ goto jumpTable[1]; }
413 * else if (case == secondCase) { goto jumptable[2]; } and so on.
415 * This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer
416 * require internal temporaries to maintain the index we're evaluating plus we're using existing code from
417 * LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and
418 * InstrGroups downstream.
421 GenTree* Lowering::LowerSwitch(GenTree* node)
425 BasicBlock** jumpTab;
427 assert(node->gtOper == GT_SWITCH);
429 // The first step is to build the default case conditional construct that is
430 // shared between both kinds of expansion of the switch node.
432 // To avoid confusion, we'll alias m_block to originalSwitchBB
433 // that represents the node we're morphing.
434 BasicBlock* originalSwitchBB = m_block;
435 LIR::Range& switchBBRange = LIR::AsRange(originalSwitchBB);
437 // jumpCnt is the number of elements in the jump table array.
438 // jumpTab is the actual pointer to the jump table array.
439 // targetCnt is the number of unique targets in the jump table array.
440 jumpCnt = originalSwitchBB->bbJumpSwt->bbsCount;
441 jumpTab = originalSwitchBB->bbJumpSwt->bbsDstTab;
442 targetCnt = originalSwitchBB->NumSucc(comp);
444 // GT_SWITCH must be a top-level node with no use.
448 assert(!switchBBRange.TryGetUse(node, &use));
452 JITDUMP("Lowering switch " FMT_BB ", %d cases\n", originalSwitchBB->bbNum, jumpCnt);
454 // Handle a degenerate case: if the switch has only a default case, just convert it
455 // to an unconditional branch. This should only happen in minopts or with debuggable
459 JITDUMP("Lowering switch " FMT_BB ": single target; converting to BBJ_ALWAYS\n", originalSwitchBB->bbNum);
460 noway_assert(comp->opts.MinOpts() || comp->opts.compDbgCode);
461 if (originalSwitchBB->bbNext == jumpTab[0])
463 originalSwitchBB->bbJumpKind = BBJ_NONE;
464 originalSwitchBB->bbJumpDest = nullptr;
468 originalSwitchBB->bbJumpKind = BBJ_ALWAYS;
469 originalSwitchBB->bbJumpDest = jumpTab[0];
471 // Remove extra predecessor links if there was more than one case.
472 for (unsigned i = 1; i < jumpCnt; ++i)
474 (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB);
477 // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign
478 // the result of the child subtree to a temp.
479 GenTree* rhs = node->gtOp.gtOp1;
481 unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Lowering is creating a new local variable"));
482 comp->lvaTable[lclNum].lvType = rhs->TypeGet();
484 GenTreeLclVar* store =
485 new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, rhs->TypeGet(), lclNum, BAD_IL_OFFSET);
487 store->gtFlags = (rhs->gtFlags & GTF_COMMON_MASK);
488 store->gtFlags |= GTF_VAR_DEF;
490 switchBBRange.InsertAfter(node, store);
491 switchBBRange.Remove(node);
496 noway_assert(jumpCnt >= 2);
498 // Spill the argument to the switch node into a local so that it can be used later.
499 unsigned blockWeight = originalSwitchBB->getBBWeight(comp);
501 LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node);
502 ReplaceWithLclVar(use);
504 // GT_SWITCH(indexExpression) is now two statements:
505 // 1. a statement containing 'asg' (for temp = indexExpression)
506 // 2. and a statement with GT_SWITCH(temp)
508 assert(node->gtOper == GT_SWITCH);
509 GenTree* temp = node->gtOp.gtOp1;
510 assert(temp->gtOper == GT_LCL_VAR);
511 unsigned tempLclNum = temp->gtLclVarCommon.gtLclNum;
512 LclVarDsc* tempVarDsc = comp->lvaTable + tempLclNum;
513 var_types tempLclType = temp->TypeGet();
515 BasicBlock* defaultBB = jumpTab[jumpCnt - 1];
516 BasicBlock* followingBB = originalSwitchBB->bbNext;
518 /* Is the number of cases right for a test and jump switch? */
519 const bool fFirstCaseFollows = (followingBB == jumpTab[0]);
520 const bool fDefaultFollows = (followingBB == defaultBB);
522 unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
524 // This means really just a single cmp/jcc (aka a simple if/else)
525 if (fFirstCaseFollows || fDefaultFollows)
527 minSwitchTabJumpCnt++;
530 #if defined(_TARGET_ARM_)
531 // On ARM for small switch tables we will
532 // generate a sequence of compare and branch instructions
533 // because the code to load the base of the switch
534 // table is huge and hideous due to the relocation... :(
535 minSwitchTabJumpCnt += 2;
536 #endif // _TARGET_ARM_
538 // Once we have the temporary variable, we construct the conditional branch for
539 // the default case. As stated above, this conditional is being shared between
540 // both GT_SWITCH lowering code paths.
541 // This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; }
542 GenTree* gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
543 comp->gtNewIconNode(jumpCnt - 2, genActualType(tempLclType)));
545 // Make sure we perform an unsigned comparison, just in case the switch index in 'temp'
546 // is now less than zero 0 (that would also hit the default case).
547 gtDefaultCaseCond->gtFlags |= GTF_UNSIGNED;
549 GenTree* gtDefaultCaseJump = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtDefaultCaseCond);
550 gtDefaultCaseJump->gtFlags = node->gtFlags;
552 LIR::Range condRange = LIR::SeqTree(comp, gtDefaultCaseJump);
553 switchBBRange.InsertAtEnd(std::move(condRange));
555 BasicBlock* afterDefaultCondBlock = comp->fgSplitBlockAfterNode(originalSwitchBB, condRange.LastNode());
557 // afterDefaultCondBlock is now the switch, and all the switch targets have it as a predecessor.
558 // originalSwitchBB is now a BBJ_NONE, and there is a predecessor edge in afterDefaultCondBlock
559 // representing the fall-through flow from originalSwitchBB.
560 assert(originalSwitchBB->bbJumpKind == BBJ_NONE);
561 assert(originalSwitchBB->bbNext == afterDefaultCondBlock);
562 assert(afterDefaultCondBlock->bbJumpKind == BBJ_SWITCH);
563 assert(afterDefaultCondBlock->bbJumpSwt->bbsHasDefault);
564 assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet.
566 // The GT_SWITCH code is still in originalSwitchBB (it will be removed later).
568 // Turn originalSwitchBB into a BBJ_COND.
569 originalSwitchBB->bbJumpKind = BBJ_COND;
570 originalSwitchBB->bbJumpDest = jumpTab[jumpCnt - 1];
572 // Fix the pred for the default case: the default block target still has originalSwitchBB
573 // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point
574 // to afterDefaultCondBlock.
575 flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock);
576 comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge);
578 bool useJumpSequence = jumpCnt < minSwitchTabJumpCnt;
580 #if defined(_TARGET_UNIX_) && defined(_TARGET_ARM_)
581 // Force using an inlined jumping instead switch table generation.
582 // Switch jump table is generated with incorrect values in CoreRT case,
583 // so any large switch will crash after loading to PC any such value.
584 // I think this is due to the fact that we use absolute addressing
585 // instead of relative. But in CoreRT is used as a rule relative
586 // addressing when we generate an executable.
587 // See also https://github.com/dotnet/coreclr/issues/13194
588 // Also https://github.com/dotnet/coreclr/pull/13197
589 useJumpSequence = useJumpSequence || comp->IsTargetAbi(CORINFO_CORERT_ABI);
590 #endif // defined(_TARGET_UNIX_) && defined(_TARGET_ARM_)
592 // If we originally had 2 unique successors, check to see whether there is a unique
593 // non-default case, in which case we can eliminate the switch altogether.
594 // Note that the single unique successor case is handled above.
595 BasicBlock* uniqueSucc = nullptr;
598 uniqueSucc = jumpTab[0];
599 noway_assert(jumpCnt >= 2);
600 for (unsigned i = 1; i < jumpCnt - 1; i++)
602 if (jumpTab[i] != uniqueSucc)
604 uniqueSucc = nullptr;
609 if (uniqueSucc != nullptr)
611 // If the unique successor immediately follows this block, we have nothing to do -
612 // it will simply fall-through after we remove the switch, below.
613 // Otherwise, make this a BBJ_ALWAYS.
614 // Now, fixup the predecessor links to uniqueSucc. In the original jumpTab:
615 // jumpTab[i-1] was the default target, which we handled above,
616 // jumpTab[0] is the first target, and we'll leave that predecessor link.
617 // Remove any additional predecessor links to uniqueSucc.
618 for (unsigned i = 1; i < jumpCnt - 1; ++i)
620 assert(jumpTab[i] == uniqueSucc);
621 (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock);
623 if (afterDefaultCondBlock->bbNext == uniqueSucc)
625 afterDefaultCondBlock->bbJumpKind = BBJ_NONE;
626 afterDefaultCondBlock->bbJumpDest = nullptr;
630 afterDefaultCondBlock->bbJumpKind = BBJ_ALWAYS;
631 afterDefaultCondBlock->bbJumpDest = uniqueSucc;
634 // If the number of possible destinations is small enough, we proceed to expand the switch
635 // into a series of conditional branches, otherwise we follow the jump table based switch
637 else if (useJumpSequence || comp->compStressCompile(Compiler::STRESS_SWITCH_CMP_BR_EXPANSION, 50))
639 // Lower the switch into a series of compare and branch IR trees.
641 // In this case we will morph the node in the following way:
642 // 1. Generate a JTRUE statement to evaluate the default case. (This happens above.)
643 // 2. Start splitting the switch basic block into subsequent basic blocks, each of which will contain
644 // a statement that is responsible for performing a comparison of the table index and conditional
647 JITDUMP("Lowering switch " FMT_BB ": using compare/branch expansion\n", originalSwitchBB->bbNum);
649 // We'll use 'afterDefaultCondBlock' for the first conditional. After that, we'll add new
650 // blocks. If we end up not needing it at all (say, if all the non-default cases just fall through),
652 bool fUsedAfterDefaultCondBlock = false;
653 BasicBlock* currentBlock = afterDefaultCondBlock;
654 LIR::Range* currentBBRange = &LIR::AsRange(currentBlock);
656 // Walk to entries 0 to jumpCnt - 1. If a case target follows, ignore it and let it fall through.
657 // If no case target follows, the last one doesn't need to be a compare/branch: it can be an
658 // unconditional branch.
659 bool fAnyTargetFollows = false;
660 for (unsigned i = 0; i < jumpCnt - 1; ++i)
662 assert(currentBlock != nullptr);
664 // Remove the switch from the predecessor list of this case target's block.
665 // We'll add the proper new predecessor edge later.
666 flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock);
668 if (jumpTab[i] == followingBB)
670 // This case label follows the switch; let it fall through.
671 fAnyTargetFollows = true;
675 // We need a block to put in the new compare and/or branch.
676 // If we haven't used the afterDefaultCondBlock yet, then use that.
677 if (fUsedAfterDefaultCondBlock)
679 BasicBlock* newBlock = comp->fgNewBBafter(BBJ_NONE, currentBlock, true);
680 comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor.
681 currentBlock = newBlock;
682 currentBBRange = &LIR::AsRange(currentBlock);
686 assert(currentBlock == afterDefaultCondBlock);
687 fUsedAfterDefaultCondBlock = true;
690 // We're going to have a branch, either a conditional or unconditional,
691 // to the target. Set the target.
692 currentBlock->bbJumpDest = jumpTab[i];
694 // Wire up the predecessor list for the "branch" case.
695 comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge);
697 if (!fAnyTargetFollows && (i == jumpCnt - 2))
699 // We're processing the last one, and there is no fall through from any case
700 // to the following block, so we can use an unconditional branch to the final
701 // case: there is no need to compare against the case index, since it's
702 // guaranteed to be taken (since the default case was handled first, above).
704 currentBlock->bbJumpKind = BBJ_ALWAYS;
708 // Otherwise, it's a conditional branch. Set the branch kind, then add the
709 // condition statement.
710 currentBlock->bbJumpKind = BBJ_COND;
712 // Now, build the conditional statement for the current case that is
717 // |____ (switchIndex) (The temp variable)
718 // |____ (ICon) (The actual case constant)
719 GenTree* gtCaseCond = comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
720 comp->gtNewIconNode(i, tempLclType));
721 GenTree* gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
722 LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch);
723 currentBBRange->InsertAtEnd(std::move(caseRange));
727 if (fAnyTargetFollows)
729 // There is a fall-through to the following block. In the loop
730 // above, we deleted all the predecessor edges from the switch.
731 // In this case, we need to add one back.
732 comp->fgAddRefPred(currentBlock->bbNext, currentBlock);
735 if (!fUsedAfterDefaultCondBlock)
737 // All the cases were fall-through! We don't need this block.
738 // Convert it from BBJ_SWITCH to BBJ_NONE and unset the BBF_DONT_REMOVE flag
739 // so fgRemoveBlock() doesn't complain.
740 JITDUMP("Lowering switch " FMT_BB ": all switch cases were fall-through\n", originalSwitchBB->bbNum);
741 assert(currentBlock == afterDefaultCondBlock);
742 assert(currentBlock->bbJumpKind == BBJ_SWITCH);
743 currentBlock->bbJumpKind = BBJ_NONE;
744 currentBlock->bbFlags &= ~BBF_DONT_REMOVE;
745 comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block.
750 // At this point the default case has already been handled and we need to generate a jump
751 // table based switch or a bit test based switch at the end of afterDefaultCondBlock. Both
752 // switch variants need the switch value so create the necessary LclVar node here.
753 GenTree* switchValue = comp->gtNewLclvNode(tempLclNum, tempLclType);
754 LIR::Range& switchBlockRange = LIR::AsRange(afterDefaultCondBlock);
755 switchBlockRange.InsertAtEnd(switchValue);
757 // Try generating a bit test based switch first,
758 // if that's not possible a jump table based switch will be generated.
759 if (!TryLowerSwitchToBitTest(jumpTab, jumpCnt, targetCnt, afterDefaultCondBlock, switchValue))
761 JITDUMP("Lowering switch " FMT_BB ": using jump table expansion\n", originalSwitchBB->bbNum);
763 #ifdef _TARGET_64BIT_
764 if (tempLclType != TYP_I_IMPL)
766 // SWITCH_TABLE expects the switch value (the index into the jump table) to be TYP_I_IMPL.
767 // Note that the switch value is unsigned so the cast should be unsigned as well.
768 switchValue = comp->gtNewCastNode(TYP_I_IMPL, switchValue, true, TYP_U_IMPL);
769 switchBlockRange.InsertAtEnd(switchValue);
773 GenTree* switchTable = comp->gtNewJmpTableNode();
774 GenTree* switchJump = comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, switchValue, switchTable);
775 switchBlockRange.InsertAfter(switchValue, switchTable, switchJump);
777 // this block no longer branches to the default block
778 afterDefaultCondBlock->bbJumpSwt->removeDefault();
781 comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock);
784 GenTree* next = node->gtNext;
786 // Get rid of the GT_SWITCH(temp).
787 switchBBRange.Remove(node->gtOp.gtOp1);
788 switchBBRange.Remove(node);
793 //------------------------------------------------------------------------
794 // TryLowerSwitchToBitTest: Attempts to transform a jump table switch into a bit test.
797 // jumpTable - The jump table
798 // jumpCount - The number of blocks in the jump table
799 // targetCount - The number of distinct blocks in the jump table
800 // bbSwitch - The switch block
801 // switchValue - A LclVar node that provides the switch value
804 // true if the switch has been lowered to a bit test
807 // If the jump table contains less than 32 (64 on 64 bit targets) entries and there
808 // are at most 2 distinct jump targets then the jump table can be converted to a word
809 // of bits where a 0 bit corresponds to one jump target and a 1 bit corresponds to the
810 // other jump target. Instead of the indirect jump a BT-JCC sequence is used to jump
811 // to the appropriate target:
812 // mov eax, 245 ; jump table converted to a "bit table"
813 // bt eax, ebx ; ebx is supposed to contain the switch value
818 // Such code is both shorter and faster (in part due to the removal of a memory load)
819 // than the traditional jump table base code. And of course, it also avoids the need
820 // to emit the jump table itself that can reach up to 256 bytes (for 64 entries).
822 bool Lowering::TryLowerSwitchToBitTest(
823 BasicBlock* jumpTable[], unsigned jumpCount, unsigned targetCount, BasicBlock* bbSwitch, GenTree* switchValue)
825 #ifndef _TARGET_XARCH_
826 // Other architectures may use this if they substitute GT_BT with equivalent code.
829 assert(jumpCount >= 2);
830 assert(targetCount >= 2);
831 assert(bbSwitch->bbJumpKind == BBJ_SWITCH);
832 assert(switchValue->OperIs(GT_LCL_VAR));
835 // Quick check to see if it's worth going through the jump table. The bit test switch supports
836 // up to 2 targets but targetCount also includes the default block so we need to allow 3 targets.
837 // We'll ensure that there are only 2 targets when building the bit table.
846 // The number of bits in the bit table is the same as the number of jump table entries. But the
847 // jump table also includes the default target (at the end) so we need to ignore it. The default
848 // has already been handled by a JTRUE(GT(switchValue, jumpCount - 2)) that LowerSwitch generates.
851 const unsigned bitCount = jumpCount - 1;
853 if (bitCount > (genTypeSize(TYP_I_IMPL) * 8))
859 // Build a bit table where a bit set to 0 corresponds to bbCase0 and a bit set to 1 corresponds to
860 // bbCase1. Simply use the first block in the jump table as bbCase1, later we can invert the bit
861 // table and/or swap the blocks if it's beneficial.
864 BasicBlock* bbCase0 = nullptr;
865 BasicBlock* bbCase1 = jumpTable[0];
868 for (unsigned bitIndex = 1; bitIndex < bitCount; bitIndex++)
870 if (jumpTable[bitIndex] == bbCase1)
872 bitTable |= (size_t(1) << bitIndex);
874 else if (bbCase0 == nullptr)
876 bbCase0 = jumpTable[bitIndex];
878 else if (jumpTable[bitIndex] != bbCase0)
880 // If it's neither bbCase0 nor bbCase1 then it means we have 3 targets. There can't be more
881 // than 3 because of the check at the start of the function.
882 assert(targetCount == 3);
888 // One of the case blocks has to follow the switch block. This requirement could be avoided
889 // by adding a BBJ_ALWAYS block after the switch block but doing that sometimes negatively
890 // impacts register allocation.
893 if ((bbSwitch->bbNext != bbCase0) && (bbSwitch->bbNext != bbCase1))
898 #ifdef _TARGET_64BIT_
900 // See if we can avoid a 8 byte immediate on 64 bit targets. If all upper 32 bits are 1
901 // then inverting the bit table will make them 0 so that the table now fits in 32 bits.
902 // Note that this does not change the number of bits in the bit table, it just takes
903 // advantage of the fact that loading a 32 bit immediate into a 64 bit register zero
904 // extends the immediate value to 64 bit.
907 if (~bitTable <= UINT32_MAX)
909 bitTable = ~bitTable;
910 std::swap(bbCase0, bbCase1);
915 // Rewire the blocks as needed and figure out the condition to use for JCC.
918 genTreeOps bbSwitchCondition = GT_NONE;
919 bbSwitch->bbJumpKind = BBJ_COND;
921 comp->fgRemoveAllRefPreds(bbCase1, bbSwitch);
922 comp->fgRemoveAllRefPreds(bbCase0, bbSwitch);
924 if (bbSwitch->bbNext == bbCase0)
926 // GT_LT + GTF_UNSIGNED generates JC so we jump to bbCase1 when the bit is set
927 bbSwitchCondition = GT_LT;
928 bbSwitch->bbJumpDest = bbCase1;
930 comp->fgAddRefPred(bbCase0, bbSwitch);
931 comp->fgAddRefPred(bbCase1, bbSwitch);
935 assert(bbSwitch->bbNext == bbCase1);
937 // GT_GE + GTF_UNSIGNED generates JNC so we jump to bbCase0 when the bit is not set
938 bbSwitchCondition = GT_GE;
939 bbSwitch->bbJumpDest = bbCase0;
941 comp->fgAddRefPred(bbCase0, bbSwitch);
942 comp->fgAddRefPred(bbCase1, bbSwitch);
946 // Append BT(bitTable, switchValue) and JCC(condition) to the switch block.
949 var_types bitTableType = (bitCount <= (genTypeSize(TYP_INT) * 8)) ? TYP_INT : TYP_LONG;
950 GenTree* bitTableIcon = comp->gtNewIconNode(bitTable, bitTableType);
951 GenTree* bitTest = comp->gtNewOperNode(GT_BT, TYP_VOID, bitTableIcon, switchValue);
952 bitTest->gtFlags |= GTF_SET_FLAGS;
953 GenTreeCC* jcc = new (comp, GT_JCC) GenTreeCC(GT_JCC, bbSwitchCondition);
954 jcc->gtFlags |= GTF_UNSIGNED | GTF_USE_FLAGS;
956 LIR::AsRange(bbSwitch).InsertAfter(switchValue, bitTableIcon, bitTest, jcc);
959 #endif // _TARGET_XARCH_
962 // NOTE: this method deliberately does not update the call arg table. It must only
963 // be used by NewPutArg and LowerArg; these functions are responsible for updating
964 // the call arg table as necessary.
965 void Lowering::ReplaceArgWithPutArgOrBitcast(GenTree** argSlot, GenTree* putArgOrBitcast)
967 assert(argSlot != nullptr);
968 assert(*argSlot != nullptr);
969 assert(putArgOrBitcast->OperIsPutArg() || putArgOrBitcast->OperIs(GT_BITCAST));
971 GenTree* arg = *argSlot;
973 // Replace the argument with the putarg/copy
974 *argSlot = putArgOrBitcast;
975 putArgOrBitcast->gtOp.gtOp1 = arg;
977 // Insert the putarg/copy into the block
978 BlockRange().InsertAfter(arg, putArgOrBitcast);
981 //------------------------------------------------------------------------
982 // NewPutArg: rewrites the tree to put an arg in a register or on the stack.
985 // call - the call whose arg is being rewritten.
986 // arg - the arg being rewritten.
987 // info - the fgArgTabEntry information for the argument.
988 // type - the type of the argument.
991 // The new tree that was created to put the arg in the right place
992 // or the incoming arg if the arg tree was not rewritten.
995 // call, arg, and info must be non-null.
998 // For System V systems with native struct passing (i.e. UNIX_AMD64_ABI defined)
999 // this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_FIELD_LIST of two GT_PUTARG_REGs
1000 // for two eightbyte structs.
1002 // For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
1003 // (i.e. UNIX_AMD64_ABI defined) this method also sets the GC pointers count and the pointers
1004 // layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
1005 // (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
1007 GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* info, var_types type)
1009 assert(call != nullptr);
1010 assert(arg != nullptr);
1011 assert(info != nullptr);
1013 GenTree* putArg = nullptr;
1014 bool updateArgTable = true;
1016 bool isOnStack = true;
1017 isOnStack = info->regNum == REG_STK;
1019 #ifdef _TARGET_ARMARCH_
1020 // Mark contained when we pass struct
1021 // GT_FIELD_LIST is always marked contained when it is generated
1022 if (type == TYP_STRUCT)
1024 arg->SetContained();
1025 if ((arg->OperGet() == GT_OBJ) && (arg->AsObj()->Addr()->OperGet() == GT_LCL_VAR_ADDR))
1027 MakeSrcContained(arg, arg->AsObj()->Addr());
1032 #if FEATURE_ARG_SPLIT
1033 // Struct can be split into register(s) and stack on ARM
1036 assert(arg->OperGet() == GT_OBJ || arg->OperGet() == GT_FIELD_LIST);
1037 // TODO: Need to check correctness for FastTailCall
1038 if (call->IsFastTailCall())
1041 NYI_ARM("lower: struct argument by fast tail call");
1042 #endif // _TARGET_ARM_
1045 putArg = new (comp, GT_PUTARG_SPLIT)
1046 GenTreePutArgSplit(arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), info->numRegs,
1047 call->IsFastTailCall(), call);
1049 // If struct argument is morphed to GT_FIELD_LIST node(s),
1050 // we can know GC info by type of each GT_FIELD_LIST node.
1051 // So we skip setting GC Pointer info.
1053 GenTreePutArgSplit* argSplit = putArg->AsPutArgSplit();
1054 for (unsigned regIndex = 0; regIndex < info->numRegs; regIndex++)
1056 argSplit->SetRegNumByIdx(info->getRegNum(regIndex), regIndex);
1059 if (arg->OperGet() == GT_OBJ)
1061 BYTE* gcLayout = nullptr;
1062 unsigned numRefs = 0;
1063 GenTreeObj* argObj = arg->AsObj();
1065 if (argObj->IsGCInfoInitialized())
1067 gcLayout = argObj->gtGcPtrs;
1068 numRefs = argObj->GetGcPtrCount();
1072 // Set GC Pointer info
1073 gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots + info->numRegs];
1074 numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
1075 argSplit->setGcPointers(numRefs, gcLayout);
1078 // Set type of registers
1079 for (unsigned index = 0; index < info->numRegs; index++)
1081 var_types regType = comp->getJitGCType(gcLayout[index]);
1082 argSplit->m_regType[index] = regType;
1087 GenTreeFieldList* fieldListPtr = arg->AsFieldList();
1088 for (unsigned index = 0; index < info->numRegs; fieldListPtr = fieldListPtr->Rest(), index++)
1090 var_types regType = fieldListPtr->gtGetOp1()->TypeGet();
1091 argSplit->m_regType[index] = regType;
1093 // Clear the register assignments on the fieldList nodes, as these are contained.
1094 fieldListPtr->gtRegNum = REG_NA;
1099 #endif // FEATURE_ARG_SPLIT
1103 #if FEATURE_MULTIREG_ARGS
1104 if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST))
1106 assert(arg->OperGet() == GT_FIELD_LIST);
1108 assert(arg->AsFieldList()->IsFieldListHead());
1109 unsigned int regIndex = 0;
1110 for (GenTreeFieldList* fieldListPtr = arg->AsFieldList(); fieldListPtr != nullptr;
1111 fieldListPtr = fieldListPtr->Rest())
1113 regNumber argReg = info->getRegNum(regIndex);
1114 GenTree* curOp = fieldListPtr->gtOp.gtOp1;
1115 var_types curTyp = curOp->TypeGet();
1117 // Create a new GT_PUTARG_REG node with op1
1118 GenTree* newOper = comp->gtNewPutArgReg(curTyp, curOp, argReg);
1120 // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
1121 ReplaceArgWithPutArgOrBitcast(&fieldListPtr->gtOp.gtOp1, newOper);
1124 // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
1125 fieldListPtr->gtRegNum = REG_NA;
1128 // Just return arg. The GT_FIELD_LIST is not replaced.
1129 // Nothing more to do.
1133 #endif // FEATURE_MULTIREG_ARGS
1135 putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
1140 // Mark this one as tail call arg if it is a fast tail call.
1141 // This provides the info to put this argument in in-coming arg area slot
1142 // instead of in out-going arg area slot.
1144 // Make sure state is correct. The PUTARG_STK has TYP_VOID, as it doesn't produce
1145 // a result. So the type of its operand must be the correct type to push on the stack.
1146 // For a FIELD_LIST, this will be the type of the field (not the type of the arg),
1147 // but otherwise it is generally the type of the operand.
1148 info->checkIsStruct();
1149 if ((arg->OperGet() != GT_FIELD_LIST))
1151 #if defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK)
1152 if (type == TYP_SIMD12)
1154 assert(info->numSlots == 3);
1157 #endif // defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK)
1159 assert(genActualType(arg->TypeGet()) == type);
1164 new (comp, GT_PUTARG_STK) GenTreePutArgStk(GT_PUTARG_STK, TYP_VOID, arg,
1165 info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots),
1166 call->IsFastTailCall(), call);
1168 #ifdef FEATURE_PUT_STRUCT_ARG_STK
1169 // If the ArgTabEntry indicates that this arg is a struct
1170 // get and store the number of slots that are references.
1171 // This is later used in the codegen for PUT_ARG_STK implementation
1172 // for struct to decide whether and how many single eight-byte copies
1173 // to be done (only for reference slots), so gcinfo is emitted.
1174 // For non-reference slots faster/smaller size instructions are used -
1175 // pair copying using XMM registers or rep mov instructions.
1178 // We use GT_OBJ only for non-lclVar, non-SIMD, non-FIELD_LIST struct arguments.
1179 if (arg->OperIsLocal())
1181 // This must have a type with a known size (SIMD or has been morphed to a primitive type).
1182 assert(arg->TypeGet() != TYP_STRUCT);
1184 else if (arg->OperIs(GT_OBJ))
1186 unsigned numRefs = 0;
1187 BYTE* gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
1188 assert(!varTypeIsSIMD(arg));
1189 numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
1190 putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
1193 // On x86 VM lies about the type of a struct containing a pointer sized
1194 // integer field by returning the type of its field as the type of struct.
1195 // Such struct can be passed in a register depending its position in
1196 // parameter list. VM does this unwrapping only one level and therefore
1197 // a type like Struct Foo { Struct Bar { int f}} awlays needs to be
1198 // passed on stack. Also, VM doesn't lie about type of such a struct
1199 // when it is a field of another struct. That is VM doesn't lie about
1200 // the type of Foo.Bar
1202 // We now support the promotion of fields that are of type struct.
1203 // However we only support a limited case where the struct field has a
1204 // single field and that single field must be a scalar type. Say Foo.Bar
1205 // field is getting passed as a parameter to a call, Since it is a TYP_STRUCT,
1206 // as per x86 ABI it should always be passed on stack. Therefore GenTree
1207 // node under a PUTARG_STK could be GT_OBJ(GT_LCL_VAR_ADDR(v1)), where
1208 // local v1 could be a promoted field standing for Foo.Bar. Note that
1209 // the type of v1 will be the type of field of Foo.Bar.f when Foo is
1210 // promoted. That is v1 will be a scalar type. In this case we need to
1211 // pass v1 on stack instead of in a register.
1213 // TODO-PERF: replace GT_OBJ(GT_LCL_VAR_ADDR(v1)) with v1 if v1 is
1214 // a scalar type and the width of GT_OBJ matches the type size of v1.
1215 // Note that this cannot be done till call node arguments are morphed
1216 // because we should not lose the fact that the type of argument is
1217 // a struct so that the arg gets correctly marked to be passed on stack.
1218 GenTree* objOp1 = arg->gtGetOp1();
1219 if (objOp1->OperGet() == GT_LCL_VAR_ADDR)
1221 unsigned lclNum = objOp1->AsLclVarCommon()->GetLclNum();
1222 if (comp->lvaTable[lclNum].lvType != TYP_STRUCT)
1224 comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_VMNeedsStackAddr));
1227 #endif // _TARGET_X86_
1229 else if (!arg->OperIs(GT_FIELD_LIST))
1231 assert(varTypeIsSIMD(arg) || (info->numSlots == 1));
1234 #endif // FEATURE_PUT_STRUCT_ARG_STK
1238 JITDUMP("new node is : ");
1242 if (arg->gtFlags & GTF_LATE_ARG)
1244 putArg->gtFlags |= GTF_LATE_ARG;
1246 else if (updateArgTable)
1248 info->node = putArg;
1253 //------------------------------------------------------------------------
1254 // LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between
1255 // the argument evaluation and the call. This is the point at which the source is
1256 // consumed and the value transitions from control of the register allocator to the calling
1260 // call - The call node
1261 // ppArg - Pointer to the call argument pointer. We might replace the call argument by
1267 void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg)
1269 GenTree* arg = *ppArg;
1271 JITDUMP("lowering arg : ");
1274 // No assignments should remain by Lowering.
1275 assert(!arg->OperIs(GT_ASG));
1276 assert(!arg->OperIsPutArgStk());
1278 // Assignments/stores at this level are not really placing an argument.
1279 // They are setting up temporary locals that will later be placed into
1280 // outgoing regs or stack.
1281 // Note that atomic ops may be stores and still produce a value.
1282 if (!arg->IsValue())
1284 assert((arg->OperIsStore() && !arg->IsValue()) || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() ||
1285 arg->OperIsCopyBlkOp());
1289 fgArgTabEntry* info = comp->gtArgEntryByNode(call, arg);
1290 assert(info->node == arg);
1291 var_types type = arg->TypeGet();
1293 if (varTypeIsSmall(type))
1295 // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
1299 #if defined(FEATURE_SIMD)
1300 #if defined(_TARGET_X86_)
1301 // Non-param TYP_SIMD12 local var nodes are massaged in Lower to TYP_SIMD16 to match their
1302 // allocated size (see lvSize()). However, when passing the variables as arguments, and
1303 // storing the variables to the outgoing argument area on the stack, we must use their
1304 // actual TYP_SIMD12 type, so exactly 12 bytes is allocated and written.
1305 if (type == TYP_SIMD16)
1307 if ((arg->OperGet() == GT_LCL_VAR) || (arg->OperGet() == GT_STORE_LCL_VAR))
1309 unsigned varNum = arg->AsLclVarCommon()->GetLclNum();
1310 LclVarDsc* varDsc = &comp->lvaTable[varNum];
1311 type = varDsc->lvType;
1313 else if (arg->OperGet() == GT_SIMD)
1315 assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12));
1317 if (arg->AsSIMD()->gtSIMDSize == 12)
1323 #elif defined(_TARGET_AMD64_)
1324 // TYP_SIMD8 parameters that are passed as longs
1325 if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum))
1327 GenTreeUnOp* bitcast = new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, TYP_LONG, arg, nullptr);
1328 BlockRange().InsertAfter(arg, bitcast);
1330 info->node = *ppArg = arg = bitcast;
1333 #endif // defined(_TARGET_X86_)
1334 #endif // defined(FEATURE_SIMD)
1336 // If we hit this we are probably double-lowering.
1337 assert(!arg->OperIsPutArg());
1339 #if !defined(_TARGET_64BIT_)
1340 if (varTypeIsLong(type))
1342 bool isReg = (info->regNum != REG_STK);
1345 noway_assert(arg->OperGet() == GT_LONG);
1346 assert(info->numRegs == 2);
1348 GenTree* argLo = arg->gtGetOp1();
1349 GenTree* argHi = arg->gtGetOp2();
1351 GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
1352 // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
1353 (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
1354 GenTree* putArg = NewPutArg(call, fieldList, info, type);
1356 BlockRange().InsertBefore(arg, putArg);
1357 BlockRange().Remove(arg);
1359 info->node = fieldList;
1363 assert(arg->OperGet() == GT_LONG);
1364 // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK.
1365 // Although the hi argument needs to be pushed first, that will be handled by the general case,
1366 // in which the fields will be reversed.
1367 assert(info->numSlots == 2);
1368 GenTree* argLo = arg->gtGetOp1();
1369 GenTree* argHi = arg->gtGetOp2();
1370 GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
1371 // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
1372 (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
1373 GenTree* putArg = NewPutArg(call, fieldList, info, type);
1374 putArg->gtRegNum = info->regNum;
1376 // We can't call ReplaceArgWithPutArgOrBitcast here because it presumes that we are keeping the original
1378 BlockRange().InsertBefore(arg, fieldList, putArg);
1379 BlockRange().Remove(arg);
1384 #endif // !defined(_TARGET_64BIT_)
1387 #ifdef _TARGET_ARMARCH_
1388 if (call->IsVarargs() || comp->opts.compUseSoftFP)
1390 // For vararg call or on armel, reg args should be all integer.
1391 // Insert copies as needed to move float value to integer register.
1392 GenTree* newNode = LowerFloatArg(ppArg, info);
1393 if (newNode != nullptr)
1395 type = newNode->TypeGet();
1398 #endif // _TARGET_ARMARCH_
1400 GenTree* putArg = NewPutArg(call, arg, info, type);
1402 // In the case of register passable struct (in one or two registers)
1403 // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_FIELD_LIST with two GT_PUTARG_REGs.)
1404 // If an extra node is returned, splice it in the right place in the tree.
1407 ReplaceArgWithPutArgOrBitcast(ppArg, putArg);
1412 #ifdef _TARGET_ARMARCH_
1413 //------------------------------------------------------------------------
1414 // LowerFloatArg: Lower float call arguments on the arm platform.
1417 // arg - The arg node
1418 // info - call argument info
1421 // Return nullptr, if no transformation was done;
1422 // return arg if there was in place transformation;
1423 // return a new tree if the root was changed.
1426 // This must handle scalar float arguments as well as GT_FIELD_LISTs
1427 // with floating point fields.
1429 GenTree* Lowering::LowerFloatArg(GenTree** pArg, fgArgTabEntry* info)
1431 GenTree* arg = *pArg;
1432 if (info->regNum != REG_STK)
1434 if (arg->OperIsFieldList())
1436 GenTreeFieldList* currListNode = arg->AsFieldList();
1437 regNumber currRegNumber = info->regNum;
1439 // Transform fields that are passed as registers in place.
1440 unsigned fieldRegCount;
1441 for (unsigned i = 0; i < info->numRegs; i += fieldRegCount)
1443 assert(currListNode != nullptr);
1444 GenTree* node = currListNode->Current();
1445 if (varTypeIsFloating(node))
1447 GenTree* intNode = LowerFloatArgReg(node, currRegNumber);
1448 assert(intNode != nullptr);
1450 ReplaceArgWithPutArgOrBitcast(currListNode->pCurrent(), intNode);
1451 currListNode->ChangeType(intNode->TypeGet());
1454 if (node->TypeGet() == TYP_DOUBLE)
1456 currRegNumber = REG_NEXT(REG_NEXT(currRegNumber));
1461 currRegNumber = REG_NEXT(currRegNumber);
1464 currListNode = currListNode->Rest();
1466 // List fields were replaced in place.
1469 else if (varTypeIsFloating(arg))
1471 GenTree* intNode = LowerFloatArgReg(arg, info->regNum);
1472 assert(intNode != nullptr);
1473 ReplaceArgWithPutArgOrBitcast(pArg, intNode);
1480 //------------------------------------------------------------------------
1481 // LowerFloatArgReg: Lower the float call argument node that is passed via register.
1484 // arg - The arg node
1485 // regNum - register number
1488 // Return new bitcast node, that moves float to int register.
1490 GenTree* Lowering::LowerFloatArgReg(GenTree* arg, regNumber regNum)
1492 var_types floatType = arg->TypeGet();
1493 assert(varTypeIsFloating(floatType));
1494 var_types intType = (floatType == TYP_DOUBLE) ? TYP_LONG : TYP_INT;
1495 GenTree* intArg = comp->gtNewBitCastNode(intType, arg);
1496 intArg->gtRegNum = regNum;
1498 if (floatType == TYP_DOUBLE)
1500 regNumber nextReg = REG_NEXT(regNum);
1501 intArg->AsMultiRegOp()->gtOtherReg = nextReg;
1508 // do lowering steps for each arg of a call
1509 void Lowering::LowerArgsForCall(GenTreeCall* call)
1511 JITDUMP("objp:\n======\n");
1512 if (call->gtCallObjp)
1514 LowerArg(call, &call->gtCallObjp);
1517 GenTreeArgList* args = call->gtCallArgs;
1519 JITDUMP("\nargs:\n======\n");
1520 for (; args; args = args->Rest())
1522 LowerArg(call, &args->Current());
1525 JITDUMP("\nlate:\n======\n");
1526 for (args = call->gtCallLateArgs; args; args = args->Rest())
1528 LowerArg(call, &args->Current());
1532 // helper that create a node representing a relocatable physical address computation
1533 GenTree* Lowering::AddrGen(ssize_t addr)
1535 // this should end up in codegen as : instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, addr)
1536 GenTree* result = comp->gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
1540 // variant that takes a void*
1541 GenTree* Lowering::AddrGen(void* addr)
1543 return AddrGen((ssize_t)addr);
1546 // do lowering steps for a call
1548 // - adding the placement nodes (either stack or register variety) for arguments
1549 // - lowering the expression that calculates the target address
1550 // - adding nodes for other operations that occur after the call sequence starts and before
1551 // control transfer occurs (profiling and tail call helpers, pinvoke incantations)
1553 void Lowering::LowerCall(GenTree* node)
1555 GenTreeCall* call = node->AsCall();
1557 JITDUMP("lowering call (before):\n");
1558 DISPTREERANGE(BlockRange(), call);
1561 call->ClearOtherRegs();
1562 LowerArgsForCall(call);
1564 // note that everything generated from this point on runs AFTER the outgoing args are placed
1565 GenTree* result = nullptr;
1567 // for x86, this is where we record ESP for checking later to make sure stack is balanced
1569 // Check for Delegate.Invoke(). If so, we inline it. We get the
1570 // target-object and target-function from the delegate-object, and do
1571 // an indirect call.
1572 if (call->IsDelegateInvoke())
1574 result = LowerDelegateInvoke(call);
1578 // Virtual and interface calls
1579 switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
1581 case GTF_CALL_VIRT_STUB:
1582 result = LowerVirtualStubCall(call);
1585 case GTF_CALL_VIRT_VTABLE:
1586 // stub dispatching is off or this is not a virtual call (could be a tailcall)
1587 result = LowerVirtualVtableCall(call);
1590 case GTF_CALL_NONVIRT:
1591 if (call->IsUnmanaged())
1593 result = LowerNonvirtPinvokeCall(call);
1595 else if (call->gtCallType == CT_INDIRECT)
1597 result = LowerIndirectNonvirtCall(call);
1601 result = LowerDirectCall(call);
1606 noway_assert(!"strange call type");
1611 if (call->IsTailCallViaHelper())
1613 // Either controlExpr or gtCallAddr must contain real call target.
1614 if (result == nullptr)
1616 assert(call->gtCallType == CT_INDIRECT);
1617 assert(call->gtCallAddr != nullptr);
1618 result = call->gtCallAddr;
1621 result = LowerTailCallViaHelper(call, result);
1623 else if (call->IsFastTailCall())
1625 LowerFastTailCall(call);
1628 if (result != nullptr)
1630 LIR::Range resultRange = LIR::SeqTree(comp, result);
1632 JITDUMP("results of lowering call:\n");
1633 DISPRANGE(resultRange);
1635 GenTree* insertionPoint = call;
1636 if (!call->IsTailCallViaHelper())
1638 // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist
1640 // TODO-LIR: find out what's really required here, as this is currently a tree order
1642 if (call->gtCallType == CT_INDIRECT)
1644 bool isClosed = false;
1645 if (call->gtCallCookie != nullptr)
1648 GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
1650 assert(call->gtCallCookie->Precedes(firstCallAddrNode));
1653 insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode();
1656 else if (call->gtCallAddr != nullptr)
1658 insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
1664 ContainCheckRange(resultRange);
1665 BlockRange().InsertBefore(insertionPoint, std::move(resultRange));
1667 call->gtControlExpr = result;
1670 if (comp->opts.IsJit64Compat())
1672 CheckVSQuirkStackPaddingNeeded(call);
1675 ContainCheckCallOperands(call);
1676 JITDUMP("lowering call (after):\n");
1677 DISPTREERANGE(BlockRange(), call);
1681 // Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14),
1682 // we still need this quirk for desktop so that older version of VS (e.g. VS2010/2012)
1683 // continues to work.
1684 // This quirk is excluded from other targets that have no back compat burden.
1686 // Quirk for VS debug-launch scenario to work:
1687 // See if this is a PInvoke call with exactly one param that is the address of a struct local.
1688 // In such a case indicate to frame-layout logic to add 16-bytes of padding
1689 // between save-reg area and locals. This is to protect against the buffer
1690 // overrun bug in microsoft.intellitrace.11.0.0.dll!ProfilerInterop.InitInterop().
1692 // A work-around to this bug is to disable IntelliTrace debugging
1693 // (VS->Tools->Options->IntelliTrace->Enable IntelliTrace - uncheck this option).
1694 // The reason why this works on Jit64 is that at the point of AV the call stack is
1696 // GetSystemInfo() Native call
1697 // IL_Stub generated for PInvoke declaration.
1698 // ProfilerInterface::InitInterop()
1699 // ProfilerInterface.Cctor()
1702 // The cctor body has just the call to InitInterop(). VM asm worker is holding
1703 // something in rbx that is used immediately after the Cctor call. Jit64 generated
1704 // InitInterop() method is pushing the registers in the following order
1714 // Due to buffer overrun, rbx doesn't get impacted. Whereas RyuJIT jitted code of
1715 // the same method is pushing regs in the following order
1723 // Therefore as a fix, we add padding between save-reg area and locals to
1724 // make this scenario work against JB.
1726 // Note: If this quirk gets broken due to other JIT optimizations, we should consider
1727 // more tolerant fix. One such fix is to padd the struct.
1728 void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
1730 assert(comp->opts.IsJit64Compat());
1732 #ifdef _TARGET_AMD64_
1733 // Confine this to IL stub calls which aren't marked as unmanaged.
1734 if (call->IsPInvoke() && !call->IsUnmanaged())
1736 bool paddingNeeded = false;
1737 GenTree* firstPutArgReg = nullptr;
1738 for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
1740 GenTree* tmp = args->Current();
1741 if (tmp->OperGet() == GT_PUTARG_REG)
1743 if (firstPutArgReg == nullptr)
1745 firstPutArgReg = tmp;
1746 GenTree* op1 = firstPutArgReg->gtOp.gtOp1;
1748 if (op1->OperGet() == GT_LCL_VAR_ADDR)
1750 unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
1751 // TODO-1stClassStructs: This is here to duplicate previous behavior,
1752 // but is not needed because the scenario being quirked did not involve
1753 // a SIMD or enregisterable struct.
1754 // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT)
1755 if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet()))
1757 // First arg is addr of a struct local.
1758 paddingNeeded = true;
1762 // Not a struct local.
1763 assert(paddingNeeded == false);
1769 // First arg is not a local var addr.
1770 assert(paddingNeeded == false);
1776 // Has more than one arg.
1777 paddingNeeded = false;
1785 comp->compVSQuirkStackPaddingNeeded = VSQUIRK_STACK_PAD;
1788 #endif // _TARGET_AMD64_
1791 // Inserts profiler hook, GT_PROF_HOOK for a tail call node.
1794 // We need to insert this after all nested calls, but before all the arguments to this call have been set up.
1795 // To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before
1796 // that. If there are no args, then it should be inserted before the call node.
1799 // * stmtExpr void (top level) (IL 0x000...0x010)
1800 // arg0 SETUP | /--* argPlace ref REG NA $c5
1801 // this in rcx | | /--* argPlace ref REG NA $c1
1802 // | | | /--* call ref System.Globalization.CultureInfo.get_InvariantCulture $c2
1803 // arg1 SETUP | | +--* st.lclVar ref V02 tmp1 REG NA $c2
1804 // | | | /--* lclVar ref V02 tmp1 u : 2 (last use) REG NA $c2
1805 // arg1 in rdx | | +--* putarg_reg ref REG NA
1806 // | | | /--* lclVar ref V00 arg0 u : 2 (last use) REG NA $80
1807 // this in rcx | | +--* putarg_reg ref REG NA
1808 // | | /--* call nullcheck ref System.String.ToLower $c5
1809 // | | { * stmtExpr void (embedded)(IL 0x000... ? ? ? )
1810 // | | { \--* prof_hook void REG NA
1811 // arg0 in rcx | +--* putarg_reg ref REG NA
1812 // control expr | +--* const(h) long 0x7ffe8e910e98 ftn REG NA
1813 // \--* call void System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void
1815 // In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call
1816 // (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
1819 // Insert the profiler hook immediately before the call. The profiler hook will preserve
1820 // all argument registers (ECX, EDX), but nothing else.
1823 // callNode - tail call node
1824 // insertionPoint - if non-null, insert the profiler hook before this point.
1825 // If null, insert the profiler hook before args are setup
1826 // but after all arg side effects are computed.
1828 void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint)
1830 assert(call->IsTailCall());
1831 assert(comp->compIsProfilerHookNeeded());
1833 #if defined(_TARGET_X86_)
1835 if (insertionPoint == nullptr)
1837 insertionPoint = call;
1840 #else // !defined(_TARGET_X86_)
1842 if (insertionPoint == nullptr)
1844 GenTree* tmp = nullptr;
1845 for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
1847 tmp = args->Current();
1848 assert(tmp->OperGet() != GT_PUTARG_REG); // We don't expect to see these in gtCallArgs
1849 if (tmp->OperGet() == GT_PUTARG_STK)
1852 insertionPoint = tmp;
1857 if (insertionPoint == nullptr)
1859 for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
1861 tmp = args->Current();
1862 if ((tmp->OperGet() == GT_PUTARG_REG) || (tmp->OperGet() == GT_PUTARG_STK))
1865 insertionPoint = tmp;
1870 // If there are no args, insert before the call node
1871 if (insertionPoint == nullptr)
1873 insertionPoint = call;
1878 #endif // !defined(_TARGET_X86_)
1880 assert(insertionPoint != nullptr);
1881 GenTree* profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID);
1882 BlockRange().InsertBefore(insertionPoint, profHookNode);
1885 // Lower fast tail call implemented as epilog+jmp.
1886 // Also inserts PInvoke method epilog if required.
1887 void Lowering::LowerFastTailCall(GenTreeCall* call)
1889 #if FEATURE_FASTTAILCALL
1890 // Tail call restrictions i.e. conditions under which tail prefix is ignored.
1891 // Most of these checks are already done by importer or fgMorphTailCall().
1892 // This serves as a double sanity check.
1893 assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
1894 assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
1895 assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
1896 assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
1898 #ifdef _TARGET_AMD64_
1899 assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
1900 #endif // _TARGET_AMD64_
1902 // We expect to see a call that meets the following conditions
1903 assert(call->IsFastTailCall());
1905 // VM cannot use return address hijacking when A() and B() tail call each
1906 // other in mutual recursion. Therefore, this block is reachable through
1907 // a GC-safe point or the whole method is marked as fully interruptible.
1910 // optReachWithoutCall() depends on the fact that loop headers blocks
1911 // will have a block number > fgLastBB. These loop headers gets added
1912 // after dominator computation and get skipped by OptReachWithoutCall().
1913 // The below condition cannot be asserted in lower because fgSimpleLowering()
1914 // can add a new basic block for range check failure which becomes
1915 // fgLastBB with block number > loop header block number.
1916 // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
1917 // !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
1919 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
1920 // a method returns. This is a case of caller method has both PInvokes and tail calls.
1921 if (comp->info.compCallUnmanaged)
1923 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
1926 // Args for tail call are setup in incoming arg area. The gc-ness of args of
1927 // caller and callee (which being tail called) may not match. Therefore, everything
1928 // from arg setup until the epilog need to be non-interuptible by GC. This is
1929 // achieved by inserting GT_START_NONGC before the very first GT_PUTARG_STK node
1930 // of call is setup. Note that once a stack arg is setup, it cannot have nested
1931 // calls subsequently in execution order to setup other args, because the nested
1932 // call could over-write the stack arg that is setup earlier.
1933 GenTree* firstPutArgStk = nullptr;
1934 GenTreeArgList* args;
1935 ArrayStack<GenTree*> putargs(comp->getAllocator(CMK_ArrayStack));
1937 for (args = call->gtCallArgs; args; args = args->Rest())
1939 GenTree* tmp = args->Current();
1940 if (tmp->OperGet() == GT_PUTARG_STK)
1946 for (args = call->gtCallLateArgs; args; args = args->Rest())
1948 GenTree* tmp = args->Current();
1949 if (tmp->OperGet() == GT_PUTARG_STK)
1955 if (putargs.Height() > 0)
1957 firstPutArgStk = putargs.Bottom();
1960 // If we have a putarg_stk node, also count the number of non-standard args the
1961 // call node has. Note that while determining whether a tail call can be fast
1962 // tail called, we don't count non-standard args (passed in R10 or R11) since they
1963 // don't contribute to outgoing arg space. These non-standard args are not
1964 // accounted in caller's arg count but accounted in callee's arg count after
1965 // fgMorphArgs(). Therefore, exclude callee's non-standard args while mapping
1966 // callee's stack arg num to corresponding caller's stack arg num.
1967 unsigned calleeNonStandardArgCount = call->GetNonStandardAddedArgCount(comp);
1969 // Say Caller(a, b, c, d, e) fast tail calls Callee(e, d, c, b, a)
1970 // i.e. passes its arguments in reverse to Callee. During call site
1971 // setup, after computing argument side effects, stack args are setup
1972 // first and reg args next. In the above example, both Callers and
1973 // Callee stack args (e and a respectively) share the same stack slot
1974 // and are alive at the same time. The act of setting up Callee's
1975 // stack arg will over-write the stack arg of Caller and if there are
1976 // further uses of Caller stack arg we have to make sure that we move
1977 // it to a temp before over-writing its slot and use temp in place of
1978 // the corresponding Caller stack arg.
1980 // For the above example, conceptually this is what is done
1982 // Stack slot of e = a
1983 // R9 = b, R8 = c, RDx = d
1986 // The below logic is meant to detect cases like this and introduce
1987 // temps to set up args correctly for Callee.
1989 for (int i = 0; i < putargs.Height(); i++)
1991 GenTree* putArgStkNode = putargs.Bottom(i);
1993 assert(putArgStkNode->OperGet() == GT_PUTARG_STK);
1995 // Get the caller arg num corresponding to this callee arg.
1996 // Note that these two args share the same stack slot. Therefore,
1997 // if there are further uses of corresponding caller arg, we need
1998 // to move it to a temp and use the temp in this call tree.
2000 // Note that Caller is guaranteed to have a param corresponding to
2001 // this Callee's arg since fast tail call mechanism counts the
2002 // stack slots required for both Caller and Callee for passing params
2003 // and allow fast tail call only if stack slots required by Caller >=
2005 fgArgTabEntry* argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode);
2006 assert(argTabEntry);
2007 unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount;
2008 noway_assert(callerArgNum < comp->info.compArgsCount);
2010 unsigned callerArgLclNum = callerArgNum;
2011 LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum;
2012 if (callerArgDsc->lvPromoted)
2015 callerArgDsc->lvFieldLclStart; // update the callerArgNum to the promoted struct field's lclNum
2016 callerArgDsc = comp->lvaTable + callerArgLclNum;
2018 noway_assert(callerArgDsc->lvIsParam);
2020 // Start searching in execution order list till we encounter call node
2021 unsigned tmpLclNum = BAD_VAR_NUM;
2022 var_types tmpType = TYP_UNDEF;
2023 for (GenTree* treeNode = putArgStkNode->gtNext; treeNode != call; treeNode = treeNode->gtNext)
2025 if (treeNode->OperIsLocal() || treeNode->OperIsLocalAddr())
2027 // This should neither be a GT_REG_VAR nor GT_PHI_ARG.
2028 assert((treeNode->OperGet() != GT_REG_VAR) && (treeNode->OperGet() != GT_PHI_ARG));
2030 GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
2031 LclVarDsc* lclVar = &comp->lvaTable[lcl->gtLclNum];
2033 // Fast tail calling criteria permits passing of structs of size 1, 2, 4 and 8 as args.
2034 // It is possible that the callerArgLclNum corresponds to such a struct whose stack slot
2035 // is getting over-written by setting up of a stack arg and there are further uses of
2036 // any of its fields if such a struct is type-dependently promoted. In this case too
2037 // we need to introduce a temp.
2038 if ((lcl->gtLclNum == callerArgNum) || (lcl->gtLclNum == callerArgLclNum))
2040 // Create tmp and use it in place of callerArgDsc
2041 if (tmpLclNum == BAD_VAR_NUM)
2043 // Set tmpType first before calling lvaGrabTemp, as that call invalidates callerArgDsc
2044 tmpType = genActualType(callerArgDsc->lvaArgType());
2045 tmpLclNum = comp->lvaGrabTemp(
2046 true DEBUGARG("Fast tail call lowering is creating a new local variable"));
2048 comp->lvaTable[tmpLclNum].lvType = tmpType;
2049 comp->lvaTable[tmpLclNum].lvDoNotEnregister = comp->lvaTable[lcl->gtLclNum].lvDoNotEnregister;
2052 lcl->SetLclNum(tmpLclNum);
2057 // If we have created a temp, insert an embedded assignment stmnt before
2058 // the first putargStkNode i.e.
2059 // tmpLcl = CallerArg
2060 if (tmpLclNum != BAD_VAR_NUM)
2062 assert(tmpType != TYP_UNDEF);
2063 GenTreeLclVar* local =
2064 new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET);
2065 GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local);
2066 ContainCheckRange(local, assignExpr);
2067 BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr));
2071 // Insert GT_START_NONGC node before the first GT_PUTARG_STK node.
2072 // Note that if there are no args to be setup on stack, no need to
2073 // insert GT_START_NONGC node.
2074 GenTree* startNonGCNode = nullptr;
2075 if (firstPutArgStk != nullptr)
2077 startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
2078 BlockRange().InsertBefore(firstPutArgStk, startNonGCNode);
2080 // Gc-interruptability in the following case:
2081 // foo(a, b, c, d, e) { bar(a, b, c, d, e); }
2082 // bar(a, b, c, d, e) { foo(a, b, d, d, e); }
2084 // Since the instruction group starting from the instruction that sets up first
2085 // stack arg to the end of the tail call is marked as non-gc interruptible,
2086 // this will form a non-interruptible tight loop causing gc-starvation. To fix
2087 // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
2088 // has a single basic block and is not a GC-safe point. The presence of a single
2089 // nop outside non-gc interruptible region will prevent gc starvation.
2090 if ((comp->fgBBcount == 1) && !(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT))
2092 assert(comp->fgFirstBB == comp->compCurBB);
2093 GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
2094 BlockRange().InsertBefore(startNonGCNode, noOp);
2098 // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be
2099 // inserted before the args are setup but after the side effects of args are
2100 // computed. That is, GT_PROF_HOOK node needs to be inserted before GT_START_NONGC
2101 // node if one exists.
2102 if (comp->compIsProfilerHookNeeded())
2104 InsertProfTailCallHook(call, startNonGCNode);
2107 #else // !FEATURE_FASTTAILCALL
2109 // Platform choose not to implement fast tail call mechanism.
2110 // In such a case we should never be reaching this method as
2111 // the expectation is that IsTailCallViaHelper() will always
2112 // be true on such a platform.
2117 //------------------------------------------------------------------------
2118 // LowerTailCallViaHelper: lower a call via the tailcall helper. Morph
2119 // has already inserted tailcall helper special arguments. This function
2120 // inserts actual data for some placeholders.
2122 // For ARM32, AMD64, lower
2123 // tail.call(void* copyRoutine, void* dummyArg, ...)
2125 // Jit_TailCall(void* copyRoutine, void* callTarget, ...)
2128 // tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg)
2130 // JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
2132 // Note that the special arguments are on the stack, whereas the function arguments follow the normal convention.
2134 // Also inserts PInvoke method epilog if required.
2137 // call - The call node
2138 // callTarget - The real call target. This is used to replace the dummyArg during lowering.
2141 // Returns control expression tree for making a call to helper Jit_TailCall.
2143 GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget)
2145 // Tail call restrictions i.e. conditions under which tail prefix is ignored.
2146 // Most of these checks are already done by importer or fgMorphTailCall().
2147 // This serves as a double sanity check.
2148 assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
2149 assert(!comp->opts.compNeedSecurityCheck); // tail call from methods that need security check
2150 assert(!call->IsUnmanaged()); // tail calls to unamanaged methods
2151 assert(!comp->compLocallocUsed); // tail call from methods that also do localloc
2153 #ifdef _TARGET_AMD64_
2154 assert(!comp->getNeedsGSSecurityCookie()); // jit64 compat: tail calls from methods that need GS check
2155 #endif // _TARGET_AMD64_
2157 // We expect to see a call that meets the following conditions
2158 assert(call->IsTailCallViaHelper());
2159 assert(callTarget != nullptr);
2161 // The TailCall helper call never returns to the caller and is not GC interruptible.
2162 // Therefore the block containing the tail call should be a GC safe point to avoid
2163 // GC starvation. It is legal for the block to be unmarked iff the entry block is a
2164 // GC safe point, as the entry block trivially dominates every reachable block.
2165 assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) || (comp->fgFirstBB->bbFlags & BBF_GC_SAFE_POINT));
2167 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
2168 // a method returns. This is a case of caller method has both PInvokes and tail calls.
2169 if (comp->info.compCallUnmanaged)
2171 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
2174 // Remove gtCallAddr from execution order if present.
2175 if (call->gtCallType == CT_INDIRECT)
2177 assert(call->gtCallAddr != nullptr);
2180 LIR::ReadOnlyRange callAddrRange = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed);
2183 BlockRange().Remove(std::move(callAddrRange));
2186 // The callTarget tree needs to be sequenced.
2187 LIR::Range callTargetRange = LIR::SeqTree(comp, callTarget);
2189 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
2191 // For ARM32 and AMD64, first argument is CopyRoutine and second argument is a place holder node.
2192 fgArgTabEntry* argEntry;
2195 argEntry = comp->gtArgEntryByArgNum(call, 0);
2196 assert(argEntry != nullptr);
2197 assert(argEntry->node->gtOper == GT_PUTARG_REG);
2198 GenTree* firstArg = argEntry->node->gtOp.gtOp1;
2199 assert(firstArg->gtOper == GT_CNS_INT);
2202 // Replace second arg by callTarget.
2203 argEntry = comp->gtArgEntryByArgNum(call, 1);
2204 assert(argEntry != nullptr);
2205 assert(argEntry->node->gtOper == GT_PUTARG_REG);
2206 GenTree* secondArg = argEntry->node->gtOp.gtOp1;
2208 ContainCheckRange(callTargetRange);
2209 BlockRange().InsertAfter(secondArg, std::move(callTargetRange));
2212 LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(secondArg, &isClosed);
2215 BlockRange().Remove(std::move(secondArgRange));
2217 argEntry->node->gtOp.gtOp1 = callTarget;
2219 #elif defined(_TARGET_X86_)
2221 // Verify the special args are what we expect, and replace the dummy args with real values.
2222 // We need to figure out the size of the outgoing stack arguments, not including the special args.
2223 // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes.
2224 // This number is exactly the next slot number in the call's argument info struct.
2225 unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum();
2226 assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args.
2227 nNewStkArgsWords -= 4;
2229 unsigned numArgs = call->fgArgInfo->ArgCount();
2231 fgArgTabEntry* argEntry;
2233 // arg 0 == callTarget.
2234 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1);
2235 assert(argEntry != nullptr);
2236 assert(argEntry->node->gtOper == GT_PUTARG_STK);
2237 GenTree* arg0 = argEntry->node->gtOp.gtOp1;
2239 ContainCheckRange(callTargetRange);
2240 BlockRange().InsertAfter(arg0, std::move(callTargetRange));
2243 LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
2245 BlockRange().Remove(std::move(secondArgRange));
2247 argEntry->node->gtOp.gtOp1 = callTarget;
2250 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2);
2251 assert(argEntry != nullptr);
2252 assert(argEntry->node->gtOper == GT_PUTARG_STK);
2253 GenTree* arg1 = argEntry->node->gtOp.gtOp1;
2254 assert(arg1->gtOper == GT_CNS_INT);
2256 ssize_t tailCallHelperFlags = 1 | // always restore EDI,ESI,EBX
2257 (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag
2258 arg1->gtIntCon.gtIconVal = tailCallHelperFlags;
2260 // arg 2 == numberOfNewStackArgsWords
2261 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3);
2262 assert(argEntry != nullptr);
2263 assert(argEntry->node->gtOper == GT_PUTARG_STK);
2264 GenTree* arg2 = argEntry->node->gtOp.gtOp1;
2265 assert(arg2->gtOper == GT_CNS_INT);
2267 arg2->gtIntCon.gtIconVal = nNewStkArgsWords;
2270 // arg 3 == numberOfOldStackArgsWords
2271 argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4);
2272 assert(argEntry != nullptr);
2273 assert(argEntry->node->gtOper == GT_PUTARG_STK);
2274 GenTree* arg3 = argEntry->node->gtOp.gtOp1;
2275 assert(arg3->gtOper == GT_CNS_INT);
2279 NYI("LowerTailCallViaHelper");
2282 // Transform this call node into a call to Jit tail call helper.
2283 call->gtCallType = CT_HELPER;
2284 call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL);
2285 call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
2287 // Lower this as if it were a pure helper call.
2288 call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER);
2289 GenTree* result = LowerDirectCall(call);
2291 // Now add back tail call flags for identifying this node as tail call dispatched via helper.
2292 call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
2294 #ifdef PROFILING_SUPPORTED
2295 // Insert profiler tail call hook if needed.
2296 // Since we don't know the insertion point, pass null for second param.
2297 if (comp->compIsProfilerHookNeeded())
2299 InsertProfTailCallHook(call, nullptr);
2301 #endif // PROFILING_SUPPORTED
2303 assert(call->IsTailCallViaHelper());
2308 #ifndef _TARGET_64BIT_
2309 //------------------------------------------------------------------------
2310 // Lowering::DecomposeLongCompare: Decomposes a TYP_LONG compare node.
2313 // cmp - the compare node
2316 // The next node to lower.
2319 // This is done during lowering because DecomposeLongs handles only nodes
2320 // that produce TYP_LONG values. Compare nodes may consume TYP_LONG values
2321 // but produce TYP_INT values.
2323 GenTree* Lowering::DecomposeLongCompare(GenTree* cmp)
2325 assert(cmp->gtGetOp1()->TypeGet() == TYP_LONG);
2327 GenTree* src1 = cmp->gtGetOp1();
2328 GenTree* src2 = cmp->gtGetOp2();
2329 assert(src1->OperIs(GT_LONG));
2330 assert(src2->OperIs(GT_LONG));
2331 GenTree* loSrc1 = src1->gtGetOp1();
2332 GenTree* hiSrc1 = src1->gtGetOp2();
2333 GenTree* loSrc2 = src2->gtGetOp1();
2334 GenTree* hiSrc2 = src2->gtGetOp2();
2335 BlockRange().Remove(src1);
2336 BlockRange().Remove(src2);
2338 genTreeOps condition = cmp->OperGet();
2342 if (cmp->OperIs(GT_EQ, GT_NE))
2345 // Transform (x EQ|NE y) into (((x.lo XOR y.lo) OR (x.hi XOR y.hi)) EQ|NE 0). If y is 0 then this can
2346 // be reduced to just ((x.lo OR x.hi) EQ|NE 0). The OR is expected to set the condition flags so we
2347 // don't need to generate a redundant compare against 0, we only generate a SETCC|JCC instruction.
2349 // XOR is used rather than SUB because it is commutative and thus allows swapping the operands when
2350 // the first happens to be a constant. Usually only the second compare operand is a constant but it's
2351 // still possible to have a constant on the left side. For example, when src1 is a uint->ulong cast
2352 // then hiSrc1 would be 0.
2355 if (loSrc1->OperIs(GT_CNS_INT))
2357 std::swap(loSrc1, loSrc2);
2360 if (loSrc2->IsIntegralConst(0))
2362 BlockRange().Remove(loSrc2);
2367 loCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, loSrc1, loSrc2);
2368 BlockRange().InsertBefore(cmp, loCmp);
2369 ContainCheckBinary(loCmp->AsOp());
2372 if (hiSrc1->OperIs(GT_CNS_INT))
2374 std::swap(hiSrc1, hiSrc2);
2377 if (hiSrc2->IsIntegralConst(0))
2379 BlockRange().Remove(hiSrc2);
2384 hiCmp = comp->gtNewOperNode(GT_XOR, TYP_INT, hiSrc1, hiSrc2);
2385 BlockRange().InsertBefore(cmp, hiCmp);
2386 ContainCheckBinary(hiCmp->AsOp());
2389 hiCmp = comp->gtNewOperNode(GT_OR, TYP_INT, loCmp, hiCmp);
2390 BlockRange().InsertBefore(cmp, hiCmp);
2391 ContainCheckBinary(hiCmp->AsOp());
2395 assert(cmp->OperIs(GT_LT, GT_LE, GT_GE, GT_GT));
2398 // If the compare is signed then (x LT|GE y) can be transformed into ((x SUB y) LT|GE 0).
2399 // If the compare is unsigned we can still use SUB but we need to check the Carry flag,
2400 // not the actual result. In both cases we can simply check the appropiate condition flags
2401 // and ignore the actual result:
2402 // SUB_LO loSrc1, loSrc2
2403 // SUB_HI hiSrc1, hiSrc2
2404 // SETCC|JCC (signed|unsigned LT|GE)
2405 // If loSrc2 happens to be 0 then the first SUB can be eliminated and the second one can
2406 // be turned into a CMP because the first SUB would have set carry to 0. This effectively
2407 // transforms a long compare against 0 into an int compare of the high part against 0.
2409 // (x LE|GT y) can to be transformed into ((x SUB y) LE|GT 0) but checking that a long value
2410 // is greater than 0 is not so easy. We need to turn this into a positive/negative check
2411 // like the one we get for LT|GE compares, this can be achieved by swapping the compare:
2412 // (x LE|GT y) becomes (y GE|LT x)
2414 // Having to swap operands is problematic when the second operand is a constant. The constant
2415 // moves to the first operand where it cannot be contained and thus needs a register. This can
2416 // be avoided by changing the constant such that LE|GT becomes LT|GE:
2417 // (x LE|GT 41) becomes (x LT|GE 42)
2420 if (cmp->OperIs(GT_LE, GT_GT))
2422 bool mustSwap = true;
2424 if (loSrc2->OperIs(GT_CNS_INT) && hiSrc2->OperIs(GT_CNS_INT))
2426 uint32_t loValue = static_cast<uint32_t>(loSrc2->AsIntCon()->IconValue());
2427 uint32_t hiValue = static_cast<uint32_t>(hiSrc2->AsIntCon()->IconValue());
2428 uint64_t value = static_cast<uint64_t>(loValue) | (static_cast<uint64_t>(hiValue) << 32);
2429 uint64_t maxValue = cmp->IsUnsigned() ? UINT64_MAX : INT64_MAX;
2431 if (value != maxValue)
2434 loValue = value & UINT32_MAX;
2435 hiValue = (value >> 32) & UINT32_MAX;
2436 loSrc2->AsIntCon()->SetIconValue(loValue);
2437 hiSrc2->AsIntCon()->SetIconValue(hiValue);
2439 condition = cmp->OperIs(GT_LE) ? GT_LT : GT_GE;
2446 std::swap(loSrc1, loSrc2);
2447 std::swap(hiSrc1, hiSrc2);
2448 condition = GenTree::SwapRelop(condition);
2452 assert((condition == GT_LT) || (condition == GT_GE));
2454 if (loSrc2->IsIntegralConst(0))
2456 BlockRange().Remove(loSrc2);
2458 // Very conservative dead code removal... but it helps.
2460 if (loSrc1->OperIs(GT_CNS_INT, GT_LCL_VAR, GT_LCL_FLD))
2462 BlockRange().Remove(loSrc1);
2466 loSrc1->SetUnusedValue();
2469 hiCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, hiSrc1, hiSrc2);
2470 BlockRange().InsertBefore(cmp, hiCmp);
2471 ContainCheckCompare(hiCmp->AsOp());
2475 loCmp = comp->gtNewOperNode(GT_CMP, TYP_VOID, loSrc1, loSrc2);
2476 hiCmp = comp->gtNewOperNode(GT_SUB_HI, TYP_INT, hiSrc1, hiSrc2);
2477 BlockRange().InsertBefore(cmp, loCmp, hiCmp);
2478 ContainCheckCompare(loCmp->AsOp());
2479 ContainCheckBinary(hiCmp->AsOp());
2482 // Try to move the first SUB_HI operands right in front of it, this allows using
2483 // a single temporary register instead of 2 (one for CMP and one for SUB_HI). Do
2484 // this only for locals as they won't change condition flags. Note that we could
2485 // move constants (except 0 which generates XOR reg, reg) but it's extremly rare
2486 // to have a constant as the first operand.
2489 if (hiSrc1->OperIs(GT_LCL_VAR, GT_LCL_FLD))
2491 BlockRange().Remove(hiSrc1);
2492 BlockRange().InsertBefore(hiCmp, hiSrc1);
2497 hiCmp->gtFlags |= GTF_SET_FLAGS;
2498 if (hiCmp->IsValue())
2500 hiCmp->SetUnusedValue();
2504 if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE))
2506 BlockRange().Remove(cmp);
2508 GenTree* jcc = cmpUse.User();
2509 jcc->gtOp.gtOp1 = nullptr;
2510 jcc->ChangeOper(GT_JCC);
2511 jcc->gtFlags |= (cmp->gtFlags & GTF_UNSIGNED) | GTF_USE_FLAGS;
2512 jcc->AsCC()->gtCondition = condition;
2516 cmp->gtOp.gtOp1 = nullptr;
2517 cmp->gtOp.gtOp2 = nullptr;
2518 cmp->ChangeOper(GT_SETCC);
2519 cmp->gtFlags |= GTF_USE_FLAGS;
2520 cmp->AsCC()->gtCondition = condition;
2525 #endif // !_TARGET_64BIT_
2527 //------------------------------------------------------------------------
2528 // Lowering::OptimizeConstCompare: Performs various "compare with const" optimizations.
2531 // cmp - the compare node
2534 // The original compare node if lowering should proceed as usual or the next node
2535 // to lower if the compare node was changed in such a way that lowering is no
2539 // - Narrow operands to enable memory operand containment (XARCH specific).
2540 // - Transform cmp(and(x, y), 0) into test(x, y) (XARCH/Arm64 specific but could
2541 // be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added).
2542 // - Transform TEST(x, LSH(1, y)) into BT(x, y) (XARCH specific)
2543 // - Transform RELOP(OP, 0) into SETCC(OP) or JCC(OP) if OP can set the
2544 // condition flags appropriately (XARCH/ARM64 specific but could be extended
2545 // to ARM32 as well if ARM32 codegen supports GTF_SET_FLAGS).
2547 GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
2549 assert(cmp->gtGetOp2()->IsIntegralConst());
2551 #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
2552 GenTree* op1 = cmp->gtGetOp1();
2553 var_types op1Type = op1->TypeGet();
2554 GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon();
2555 ssize_t op2Value = op2->IconValue();
2557 #ifdef _TARGET_XARCH_
2558 if (IsContainableMemoryOp(op1) && varTypeIsSmall(op1Type) && genSmallTypeCanRepresentValue(op1Type, op2Value))
2561 // If op1's type is small then try to narrow op2 so it has the same type as op1.
2562 // Small types are usually used by memory loads and if both compare operands have
2563 // the same type then the memory load can be contained. In certain situations
2564 // (e.g "cmp ubyte, 200") we also get a smaller instruction encoding.
2567 op2->gtType = op1Type;
2571 if (op1->OperIs(GT_CAST) && !op1->gtOverflow())
2573 GenTreeCast* cast = op1->AsCast();
2574 var_types castToType = cast->CastToType();
2575 GenTree* castOp = cast->gtGetOp1();
2577 if (((castToType == TYP_BOOL) || (castToType == TYP_UBYTE)) && FitsIn<UINT8>(op2Value))
2580 // Since we're going to remove the cast we need to be able to narrow the cast operand
2581 // to the cast type. This can be done safely only for certain opers (e.g AND, OR, XOR).
2582 // Some opers just can't be narrowed (e.g DIV, MUL) while other could be narrowed but
2583 // doing so would produce incorrect results (e.g. RSZ, RSH).
2585 // The below list of handled opers is conservative but enough to handle the most common
2586 // situations. In particular this include CALL, sometimes the JIT unnecessarilly widens
2587 // the result of bool returning calls.
2590 #ifdef _TARGET_ARM64_
2591 (op2Value == 0) && cmp->OperIs(GT_EQ, GT_NE, GT_GT) &&
2593 (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical()
2594 #ifdef _TARGET_XARCH_
2595 || IsContainableMemoryOp(castOp)
2601 assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation
2603 #ifdef _TARGET_ARM64_
2604 bool cmpEq = cmp->OperIs(GT_EQ);
2606 cmp->SetOperRaw(cmpEq ? GT_TEST_EQ : GT_TEST_NE);
2607 op2->SetIconValue(0xff);
2608 op2->gtType = castOp->gtType;
2610 castOp->gtType = castToType;
2611 op2->gtType = castToType;
2613 // If we have any contained memory ops on castOp, they must now not be contained.
2614 if (castOp->OperIsLogical())
2616 GenTree* op1 = castOp->gtGetOp1();
2617 if ((op1 != nullptr) && !op1->IsCnsIntOrI())
2619 op1->ClearContained();
2621 GenTree* op2 = castOp->gtGetOp2();
2622 if ((op2 != nullptr) && !op2->IsCnsIntOrI())
2624 op2->ClearContained();
2627 cmp->gtOp.gtOp1 = castOp;
2629 BlockRange().Remove(cast);
2633 else if (op1->OperIs(GT_AND) && cmp->OperIs(GT_EQ, GT_NE))
2636 // Transform ((x AND y) EQ|NE 0) into (x TEST_EQ|TEST_NE y) when possible.
2639 GenTree* andOp1 = op1->gtGetOp1();
2640 GenTree* andOp2 = op1->gtGetOp2();
2645 // If we don't have a 0 compare we can get one by transforming ((x AND mask) EQ|NE mask)
2646 // into ((x AND mask) NE|EQ 0) when mask is a single bit.
2649 if (isPow2(static_cast<size_t>(op2Value)) && andOp2->IsIntegralConst(op2Value))
2652 op2->SetIconValue(0);
2653 cmp->SetOperRaw(GenTree::ReverseRelop(cmp->OperGet()));
2659 BlockRange().Remove(op1);
2660 BlockRange().Remove(op2);
2662 cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE);
2663 cmp->gtOp.gtOp1 = andOp1;
2664 cmp->gtOp.gtOp2 = andOp2;
2665 // We will re-evaluate containment below
2666 andOp1->ClearContained();
2667 andOp2->ClearContained();
2669 #ifdef _TARGET_XARCH_
2670 if (IsContainableMemoryOp(andOp1) && andOp2->IsIntegralConst())
2673 // For "test" we only care about the bits that are set in the second operand (mask).
2674 // If the mask fits in a small type then we can narrow both operands to generate a "test"
2675 // instruction with a smaller encoding ("test" does not have a r/m32, imm8 form) and avoid
2676 // a widening load in some cases.
2678 // For 16 bit operands we narrow only if the memory operand is already 16 bit. This matches
2679 // the behavior of a previous implementation and avoids adding more cases where we generate
2680 // 16 bit instructions that require a length changing prefix (0x66). These suffer from
2681 // significant decoder stalls on Intel CPUs.
2683 // We could also do this for 64 bit masks that fit into 32 bit but it doesn't help.
2684 // In such cases morph narrows down the existing GT_AND by inserting a cast between it and
2685 // the memory operand so we'd need to add more code to recognize and eliminate that cast.
2688 size_t mask = static_cast<size_t>(andOp2->AsIntCon()->IconValue());
2690 if (FitsIn<UINT8>(mask))
2692 andOp1->gtType = TYP_UBYTE;
2693 andOp2->gtType = TYP_UBYTE;
2695 else if (FitsIn<UINT16>(mask) && genTypeSize(andOp1) == 2)
2697 andOp1->gtType = TYP_USHORT;
2698 andOp2->gtType = TYP_USHORT;
2705 if (cmp->OperIs(GT_TEST_EQ, GT_TEST_NE))
2707 #ifdef _TARGET_XARCH_
2709 // Transform TEST_EQ|NE(x, LSH(1, y)) into BT(x, y) when possible. Using BT
2710 // results in smaller and faster code. It also doesn't have special register
2711 // requirements, unlike LSH that requires the shift count to be in ECX.
2712 // Note that BT has the same behavior as LSH when the bit index exceeds the
2713 // operand bit size - it uses (bit_index MOD bit_size).
2716 GenTree* lsh = cmp->gtGetOp2();
2719 if (lsh->OperIs(GT_LSH) && varTypeIsIntOrI(lsh->TypeGet()) && lsh->gtGetOp1()->IsIntegralConst(1) &&
2720 BlockRange().TryGetUse(cmp, &cmpUse))
2722 genTreeOps condition = cmp->OperIs(GT_TEST_NE) ? GT_LT : GT_GE;
2724 cmp->SetOper(GT_BT);
2725 cmp->gtType = TYP_VOID;
2726 cmp->gtFlags |= GTF_SET_FLAGS;
2727 cmp->gtOp.gtOp2 = lsh->gtGetOp2();
2728 cmp->gtGetOp2()->ClearContained();
2730 BlockRange().Remove(lsh->gtGetOp1());
2731 BlockRange().Remove(lsh);
2735 if (cmpUse.User()->OperIs(GT_JTRUE))
2737 cmpUse.User()->ChangeOper(GT_JCC);
2738 cc = cmpUse.User()->AsCC();
2739 cc->gtCondition = condition;
2743 cc = new (comp, GT_SETCC) GenTreeCC(GT_SETCC, condition, TYP_INT);
2744 BlockRange().InsertAfter(cmp, cc);
2745 cmpUse.ReplaceWith(comp, cc);
2748 cc->gtFlags |= GTF_USE_FLAGS | GTF_UNSIGNED;
2752 #endif // _TARGET_XARCH_
2754 else if (cmp->OperIs(GT_EQ, GT_NE))
2756 GenTree* op1 = cmp->gtGetOp1();
2757 GenTree* op2 = cmp->gtGetOp2();
2759 // TODO-CQ: right now the below peep is inexpensive and gets the benefit in most
2760 // cases because in majority of cases op1, op2 and cmp would be in that order in
2761 // execution. In general we should be able to check that all the nodes that come
2762 // after op1 do not modify the flags so that it is safe to avoid generating a
2763 // test instruction.
2765 if (op2->IsIntegralConst(0) && (op1->gtNext == op2) && (op2->gtNext == cmp) &&
2766 #ifdef _TARGET_XARCH_
2767 op1->OperIs(GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_NEG))
2768 #else // _TARGET_ARM64_
2769 op1->OperIs(GT_AND, GT_ADD, GT_SUB))
2772 op1->gtFlags |= GTF_SET_FLAGS;
2773 op1->SetUnusedValue();
2775 BlockRange().Remove(op2);
2777 GenTree* next = cmp->gtNext;
2782 // Fast check for the common case - relop used by a JTRUE that immediately follows it.
2783 if ((next != nullptr) && next->OperIs(GT_JTRUE) && (next->gtGetOp1() == cmp))
2788 BlockRange().Remove(cmp);
2790 else if (BlockRange().TryGetUse(cmp, &cmpUse) && cmpUse.User()->OperIs(GT_JTRUE))
2795 BlockRange().Remove(cmp);
2797 else // The relop is not used by a JTRUE or it is not used at all.
2799 // Transform the relop node it into a SETCC. If it's not used we could remove
2800 // it completely but that means doing more work to handle a rare case.
2805 genTreeOps condition = cmp->OperGet();
2806 cc->ChangeOper(ccOp);
2807 cc->AsCC()->gtCondition = condition;
2808 cc->gtFlags |= GTF_USE_FLAGS | (cmp->gtFlags & GTF_UNSIGNED);
2813 #endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
2818 //------------------------------------------------------------------------
2819 // Lowering::LowerCompare: Lowers a compare node.
2822 // cmp - the compare node
2825 // The next node to lower.
2827 GenTree* Lowering::LowerCompare(GenTree* cmp)
2829 #ifndef _TARGET_64BIT_
2830 if (cmp->gtGetOp1()->TypeGet() == TYP_LONG)
2832 return DecomposeLongCompare(cmp);
2836 if (cmp->gtGetOp2()->IsIntegralConst() && !comp->opts.MinOpts())
2838 GenTree* next = OptimizeConstCompare(cmp);
2840 // If OptimizeConstCompare return the compare node as "next" then we need to continue lowering.
2847 #ifdef _TARGET_XARCH_
2848 if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet())
2850 if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet()))
2853 // If both operands have the same type then codegen will use the common operand type to
2854 // determine the instruction type. For small types this would result in performing a
2855 // signed comparison of two small unsigned values without zero extending them to TYP_INT
2856 // which is incorrect. Note that making the comparison unsigned doesn't imply that codegen
2857 // has to generate a small comparison, it can still correctly generate a TYP_INT comparison.
2860 cmp->gtFlags |= GTF_UNSIGNED;
2863 #endif // _TARGET_XARCH_
2864 ContainCheckCompare(cmp->AsOp());
2868 //------------------------------------------------------------------------
2869 // Lowering::LowerJTrue: Lowers a JTRUE node.
2872 // jtrue - the JTRUE node
2875 // The next node to lower (usually nullptr).
2878 // On ARM64 this may remove the JTRUE node and transform its associated
2879 // relop into a JCMP node.
2881 GenTree* Lowering::LowerJTrue(GenTreeOp* jtrue)
2883 #ifdef _TARGET_ARM64_
2884 GenTree* relop = jtrue->gtGetOp1();
2885 GenTree* relopOp2 = relop->gtOp.gtGetOp2();
2887 if ((relop->gtNext == jtrue) && relopOp2->IsCnsIntOrI())
2889 bool useJCMP = false;
2892 if (relop->OperIs(GT_EQ, GT_NE) && relopOp2->IsIntegralConst(0))
2894 // Codegen will use cbz or cbnz in codegen which do not affect the flag register
2895 flags = relop->OperIs(GT_EQ) ? GTF_JCMP_EQ : 0;
2898 else if (relop->OperIs(GT_TEST_EQ, GT_TEST_NE) && isPow2(relopOp2->AsIntCon()->IconValue()))
2900 // Codegen will use tbz or tbnz in codegen which do not affect the flag register
2901 flags = GTF_JCMP_TST | (relop->OperIs(GT_TEST_EQ) ? GTF_JCMP_EQ : 0);
2907 relop->SetOper(GT_JCMP);
2908 relop->gtFlags &= ~(GTF_JCMP_TST | GTF_JCMP_EQ);
2909 relop->gtFlags |= flags;
2910 relop->gtType = TYP_VOID;
2912 relopOp2->SetContained();
2914 BlockRange().Remove(jtrue);
2916 assert(relop->gtNext == nullptr);
2920 #endif // _TARGET_ARM64_
2922 ContainCheckJTrue(jtrue);
2924 assert(jtrue->gtNext == nullptr);
2928 // Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
2929 void Lowering::LowerJmpMethod(GenTree* jmp)
2931 assert(jmp->OperGet() == GT_JMP);
2933 JITDUMP("lowering GT_JMP\n");
2935 JITDUMP("============");
2937 // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
2938 // a method returns.
2939 if (comp->info.compCallUnmanaged)
2941 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp));
2945 // Lower GT_RETURN node to insert PInvoke method epilog if required.
2946 void Lowering::LowerRet(GenTree* ret)
2948 assert(ret->OperGet() == GT_RETURN);
2950 JITDUMP("lowering GT_RETURN\n");
2952 JITDUMP("============");
2954 #if defined(_TARGET_AMD64_) && defined(FEATURE_SIMD)
2955 GenTreeUnOp* const unOp = ret->AsUnOp();
2956 if ((unOp->TypeGet() == TYP_LONG) && (unOp->gtOp1->TypeGet() == TYP_SIMD8))
2958 GenTreeUnOp* bitcast = new (comp, GT_BITCAST) GenTreeOp(GT_BITCAST, TYP_LONG, unOp->gtOp1, nullptr);
2959 unOp->gtOp1 = bitcast;
2960 BlockRange().InsertBefore(unOp, bitcast);
2962 #endif // _TARGET_AMD64_
2964 // Method doing PInvokes has exactly one return block unless it has tail calls.
2965 if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB))
2967 InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret));
2969 ContainCheckRet(ret->AsOp());
2972 GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
2974 noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER);
2976 // Don't support tail calling helper methods.
2977 // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper.
2978 noway_assert(!call->IsTailCall() || call->IsTailCallViaHelper() || call->gtCallType == CT_USER_FUNC);
2980 // Non-virtual direct/indirect calls: Work out if the address of the
2981 // call is known at JIT time. If not it is either an indirect call
2982 // or the address must be accessed via an single/double indirection.
2985 InfoAccessType accessType;
2986 CorInfoHelpFunc helperNum = comp->eeGetHelperNum(call->gtCallMethHnd);
2988 #ifdef FEATURE_READYTORUN_COMPILER
2989 if (call->gtEntryPoint.addr != nullptr)
2991 accessType = call->gtEntryPoint.accessType;
2992 addr = call->gtEntryPoint.addr;
2996 if (call->gtCallType == CT_HELPER)
2998 noway_assert(helperNum != CORINFO_HELP_UNDEF);
3000 // the convention on getHelperFtn seems to be (it's not documented)
3001 // that it returns an address or if it returns null, pAddr is set to
3002 // another address, which requires an indirection
3004 addr = comp->info.compCompHnd->getHelperFtn(helperNum, (void**)&pAddr);
3006 if (addr != nullptr)
3008 assert(pAddr == nullptr);
3009 accessType = IAT_VALUE;
3013 accessType = IAT_PVALUE;
3019 noway_assert(helperNum == CORINFO_HELP_UNDEF);
3021 CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
3023 if (call->IsSameThis())
3025 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
3028 if (!call->NeedsNullCheck())
3030 aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
3033 CORINFO_CONST_LOOKUP addrInfo;
3034 comp->info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo, aflags);
3036 accessType = addrInfo.accessType;
3037 addr = addrInfo.addr;
3040 GenTree* result = nullptr;
3044 // Non-virtual direct call to known address
3045 if (!IsCallTargetInRange(addr) || call->IsTailCall())
3047 result = AddrGen(addr);
3051 // a direct call within range of hardware relative call instruction
3052 // stash the address for codegen
3053 call->gtDirectCallAddress = addr;
3059 // Non-virtual direct calls to addresses accessed by
3060 // a single indirection.
3061 GenTree* cellAddr = AddrGen(addr);
3062 GenTree* indir = Ind(cellAddr);
3068 // Non-virtual direct calls to addresses accessed by
3069 // a double indirection.
3071 // Double-indirection. Load the address into a register
3072 // and call indirectly through the register
3073 noway_assert(helperNum == CORINFO_HELP_UNDEF);
3074 result = AddrGen(addr);
3075 result = Ind(Ind(result));
3080 // Non-virtual direct calls to addresses accessed by
3081 // a single relative indirection.
3082 GenTree* cellAddr = AddrGen(addr);
3083 GenTree* indir = Ind(cellAddr);
3084 result = comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, indir, AddrGen(addr));
3089 noway_assert(!"Bad accessType");
3096 GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
3098 noway_assert(call->gtCallType == CT_USER_FUNC);
3100 assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
3101 (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
3103 GenTree* thisArgNode;
3104 if (call->IsTailCallViaHelper())
3106 #ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
3107 const unsigned argNum = 0;
3108 #else // !_TARGET_X86_
3109 // In case of helper dispatched tail calls, "thisptr" will be the third arg.
3110 // The first two args are: real call target and addr of args copy routine.
3111 const unsigned argNum = 2;
3112 #endif // !_TARGET_X86_
3114 fgArgTabEntry* thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum);
3115 thisArgNode = thisArgTabEntry->node;
3119 thisArgNode = comp->gtGetThisArg(call);
3122 assert(thisArgNode->gtOper == GT_PUTARG_REG);
3123 GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1;
3124 GenTree* thisExpr = originalThisExpr;
3126 // We're going to use the 'this' expression multiple times, so make a local to copy it.
3131 if (call->IsTailCallViaHelper() && originalThisExpr->IsLocal())
3133 // For ordering purposes for the special tailcall arguments on x86, we forced the
3134 // 'this' pointer in this case to a local in Compiler::fgMorphTailCall().
3135 // We could possibly use this case to remove copies for all architectures and non-tailcall
3136 // calls by creating a new lcl var or lcl field reference, as is done in the
3137 // LowerVirtualVtableCall() code.
3138 assert(originalThisExpr->OperGet() == GT_LCL_VAR);
3139 lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum();
3142 #endif // _TARGET_X86_
3144 unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call"));
3146 LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode);
3147 ReplaceWithLclVar(thisExprUse, delegateInvokeTmp);
3149 thisExpr = thisExprUse.Def(); // it's changed; reload it.
3150 lclNum = delegateInvokeTmp;
3153 // replace original expression feeding into thisPtr with
3154 // [originalThis + offsetOfDelegateInstance]
3156 GenTree* newThisAddr = new (comp, GT_LEA)
3157 GenTreeAddrMode(TYP_BYREF, thisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
3159 GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
3161 BlockRange().InsertAfter(thisExpr, newThisAddr, newThis);
3163 thisArgNode->gtOp.gtOp1 = newThis;
3164 ContainCheckIndir(newThis->AsIndir());
3166 // the control target is
3167 // [originalThis + firstTgtOffs]
3169 GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET);
3171 unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget;
3172 GenTree* result = new (comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs);
3173 GenTree* callTarget = Ind(result);
3175 // don't need to sequence and insert this tree, caller will do it
3180 GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
3183 if (call->gtCallCookie != nullptr)
3185 NYI_X86("Morphing indirect non-virtual call with non-standard args");
3189 // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
3190 // Hence we should never see this type of call in lower.
3192 noway_assert(call->gtCallCookie == nullptr);
3197 //------------------------------------------------------------------------
3198 // CreateReturnTrapSeq: Create a tree to perform a "return trap", used in PInvoke
3199 // epilogs to invoke a GC under a condition. The return trap checks some global
3200 // location (the runtime tells us where that is and how many indirections to make),
3201 // then, based on the result, conditionally calls a GC helper. We use a special node
3202 // for this because at this time (late in the compilation phases), introducing flow
3203 // is tedious/difficult.
3205 // This is used for PInvoke inlining.
3208 // Code tree to perform the action.
3210 GenTree* Lowering::CreateReturnTrapSeq()
3212 // The GT_RETURNTRAP node expands to this:
3213 // if (g_TrapReturningThreads)
3215 // RareDisablePreemptiveGC();
3218 // The only thing to do here is build up the expression that evaluates 'g_TrapReturningThreads'.
3220 void* pAddrOfCaptureThreadGlobal = nullptr;
3221 LONG* addrOfCaptureThreadGlobal = comp->info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
3224 if (addrOfCaptureThreadGlobal != nullptr)
3226 testTree = Ind(AddrGen(addrOfCaptureThreadGlobal));
3230 testTree = Ind(Ind(AddrGen(pAddrOfCaptureThreadGlobal)));
3232 return comp->gtNewOperNode(GT_RETURNTRAP, TYP_INT, testTree);
3235 //------------------------------------------------------------------------
3236 // SetGCState: Create a tree that stores the given constant (0 or 1) into the
3237 // thread's GC state field.
3239 // This is used for PInvoke inlining.
3242 // state - constant (0 or 1) to store into the thread's GC state field.
3245 // Code tree to perform the action.
3247 GenTree* Lowering::SetGCState(int state)
3249 // Thread.offsetOfGcState = 0/1
3251 assert(state == 0 || state == 1);
3253 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
3255 GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1);
3257 GenTree* stateNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state);
3258 GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState);
3259 GenTree* storeGcState = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_BYTE, addr, stateNode);
3260 return storeGcState;
3263 //------------------------------------------------------------------------
3264 // CreateFrameLinkUpdate: Create a tree that either links or unlinks the
3265 // locally-allocated InlinedCallFrame from the Frame list.
3267 // This is used for PInvoke inlining.
3270 // action - whether to link (push) or unlink (pop) the Frame
3273 // Code tree to perform the action.
3275 GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
3277 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
3278 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
3280 GenTree* TCB = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
3281 (IL_OFFSET)-1); // cast to resolve ambiguity.
3284 GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, TCB, nullptr, 1, pInfo->offsetOfThreadFrame);
3286 GenTree* data = nullptr;
3288 if (action == PushFrame)
3290 // Thread->m_pFrame = &inlinedCallFrame;
3291 data = new (comp, GT_LCL_FLD_ADDR)
3292 GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
3296 assert(action == PopFrame);
3297 // Thread->m_pFrame = inlinedCallFrame.m_pNext;
3299 data = new (comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar,
3300 pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
3302 GenTree* storeInd = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data);
3306 //------------------------------------------------------------------------
3307 // InsertPInvokeMethodProlog: Create the code that runs at the start of
3308 // every method that has PInvoke calls.
3310 // Initialize the TCB local and the InlinedCallFrame object. Then link ("push")
3311 // the InlinedCallFrame object on the Frame chain. The layout of InlinedCallFrame
3312 // is defined in vm/frames.h. See also vm/jitinterface.cpp for more information.
3313 // The offsets of these fields is returned by the VM in a call to ICorStaticInfo::getEEInfo().
3315 // The (current) layout is as follows:
3317 // 64-bit 32-bit CORINFO_EE_INFO
3318 // offset offset field name offset when set
3319 // -----------------------------------------------------------------------------------------
3320 // +00h +00h GS cookie offsetOfGSCookie
3321 // +08h +04h vptr for class InlinedCallFrame offsetOfFrameVptr method prolog
3322 // +10h +08h m_Next offsetOfFrameLink method prolog
3323 // +18h +0Ch m_Datum offsetOfCallTarget call site
3324 // +20h n/a m_StubSecretArg not set by JIT
3325 // +28h +10h m_pCallSiteSP offsetOfCallSiteSP x86: call site, and zeroed in method
3327 // non-x86: method prolog (SP remains
3328 // constant in function, after prolog: no
3329 // localloc and PInvoke in same function)
3330 // +30h +14h m_pCallerReturnAddress offsetOfReturnAddress call site
3331 // +38h +18h m_pCalleeSavedFP offsetOfCalleeSavedFP not set by JIT
3332 // +1Ch JIT retval spill area (int) before call_gc ???
3333 // +20h JIT retval spill area (long) before call_gc ???
3334 // +24h Saved value of EBP method prolog ???
3336 // Note that in the VM, InlinedCallFrame is a C++ class whose objects have a 'this' pointer that points
3337 // to the InlinedCallFrame vptr (the 2nd field listed above), and the GS cookie is stored *before*
3338 // the object. When we link the InlinedCallFrame onto the Frame chain, we must point at this location,
3339 // and not at the beginning of the InlinedCallFrame local, which is actually the GS cookie.
3344 void Lowering::InsertPInvokeMethodProlog()
3346 noway_assert(comp->info.compCallUnmanaged);
3347 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
3349 if (comp->opts.ShouldUsePInvokeHelpers())
3354 JITDUMP("======= Inserting PInvoke method prolog\n");
3356 // The first BB must be a scratch BB in order for us to be able to safely insert the P/Invoke prolog.
3357 assert(comp->fgFirstBBisScratch());
3359 LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB);
3361 const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
3362 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
3364 // First arg: &compiler->lvaInlinedPInvokeFrameVar + callFrameInfo.offsetOfFrameVptr
3366 GenTree* frameAddr = new (comp, GT_LCL_FLD_ADDR)
3367 GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
3369 // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
3370 // TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
3371 // for x86, don't pass the secretArg.
3372 CLANG_FORMAT_COMMENT_ANCHOR;
3374 #if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
3375 GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
3377 GenTreeArgList* argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM));
3380 GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, argList);
3382 // some sanity checks on the frame list root vardsc
3383 LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot];
3384 noway_assert(!varDsc->lvIsParam);
3385 noway_assert(varDsc->lvType == TYP_I_IMPL);
3388 new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
3389 (IL_OFFSET)-1); // cast to resolve ambiguity.
3390 store->gtOp.gtOp1 = call;
3391 store->gtFlags |= GTF_VAR_DEF;
3393 GenTree* const insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode();
3395 comp->fgMorphTree(store);
3396 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store));
3397 DISPTREERANGE(firstBlockRange, store);
3399 #if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
3400 // For x86, this step is done at the call site (due to stack pointer not being static in the function).
3401 // For arm32, CallSiteSP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME.
3403 // --------------------------------------------------------
3404 // InlinedCallFrame.m_pCallSiteSP = @RSP;
3406 GenTreeLclFld* storeSP = new (comp, GT_STORE_LCL_FLD)
3407 GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
3408 storeSP->gtOp1 = PhysReg(REG_SPBASE);
3409 storeSP->gtFlags |= GTF_VAR_DEF;
3411 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP));
3412 DISPTREERANGE(firstBlockRange, storeSP);
3414 #endif // !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
3416 #if !defined(_TARGET_ARM_)
3417 // For arm32, CalleeSavedFP is set up by the call to CORINFO_HELP_INIT_PINVOKE_FRAME.
3419 // --------------------------------------------------------
3420 // InlinedCallFrame.m_pCalleeSavedEBP = @RBP;
3422 GenTreeLclFld* storeFP =
3423 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
3424 callFrameInfo.offsetOfCalleeSavedFP);
3425 storeFP->gtOp1 = PhysReg(REG_FPBASE);
3426 storeFP->gtFlags |= GTF_VAR_DEF;
3428 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP));
3429 DISPTREERANGE(firstBlockRange, storeFP);
3430 #endif // !defined(_TARGET_ARM_)
3432 // --------------------------------------------------------
3433 // On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto
3434 // the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame.
3435 CLANG_FORMAT_COMMENT_ANCHOR;
3437 #ifdef _TARGET_64BIT_
3438 if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
3440 // Push a frame - if we are NOT in an IL stub, this is done right before the call
3441 // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
3442 GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
3443 firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
3444 ContainCheckStoreIndir(frameUpd->AsIndir());
3445 DISPTREERANGE(firstBlockRange, frameUpd);
3447 #endif // _TARGET_64BIT_
3450 //------------------------------------------------------------------------
3451 // InsertPInvokeMethodEpilog: Code that needs to be run when exiting any method
3452 // that has PInvoke inlines. This needs to be inserted any place you can exit the
3453 // function: returns, tailcalls and jmps.
3456 // returnBB - basic block from which a method can return
3457 // lastExpr - GenTree of the last top level stmnt of returnBB (debug only arg)
3460 // Code tree to perform the action.
3462 void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree* lastExpr))
3464 assert(returnBB != nullptr);
3465 assert(comp->info.compCallUnmanaged);
3467 if (comp->opts.ShouldUsePInvokeHelpers())
3472 JITDUMP("======= Inserting PInvoke method epilog\n");
3474 // Method doing PInvoke calls has exactly one return block unless it has "jmp" or tail calls.
3475 assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) ||
3476 returnBB->endsWithTailCallOrJmp(comp));
3478 LIR::Range& returnBlockRange = LIR::AsRange(returnBB);
3480 GenTree* insertionPoint = returnBlockRange.LastNode();
3481 assert(insertionPoint == lastExpr);
3483 // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution
3484 // order so that it is guaranteed that there will be no further PInvokes after that point in the method.
3486 // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN. After inserting PME, execution order would be
3487 // Op1, PME, GT_RETURN
3489 // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be
3490 // arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL
3491 // After inserting PME execution order would be:
3492 // arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL
3494 // Example3: GT_JMP. After inserting PME execution order would be: PME, GT_JMP
3495 // That is after PME, args for GT_JMP call will be setup.
3497 // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a
3498 // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke. Though this is redundant,
3500 // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has
3501 // PInvokes. Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot
3502 // will be live-in to a BBJ_RETURN block without any uses. Long term we need to fix liveness for x64 case to
3503 // properly extend the life of compLvFrameListRoot var.
3505 // Thread.offsetOfGcState = 0/1
3506 // That is [tcb + offsetOfGcState] = 1
3507 GenTree* storeGCState = SetGCState(1);
3508 returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState));
3509 ContainCheckStoreIndir(storeGCState->AsIndir());
3511 // Pop the frame if necessary. This always happens in the epilog on 32-bit targets. For 64-bit targets, we only do
3512 // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call.
3513 CLANG_FORMAT_COMMENT_ANCHOR;
3515 #ifdef _TARGET_64BIT_
3516 if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
3517 #endif // _TARGET_64BIT_
3519 GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
3520 returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
3521 ContainCheckStoreIndir(frameUpd->AsIndir());
3525 //------------------------------------------------------------------------
3526 // InsertPInvokeCallProlog: Emit the call-site prolog for direct calls to unmanaged code.
3527 // It does all the necessary call-site setup of the InlinedCallFrame.
3530 // call - the call for which we are inserting the PInvoke prolog.
3535 void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
3537 JITDUMP("======= Inserting PInvoke call prolog\n");
3539 GenTree* insertBefore = call;
3540 if (call->gtCallType == CT_INDIRECT)
3543 insertBefore = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
3547 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
3549 gtCallTypes callType = (gtCallTypes)call->gtCallType;
3551 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
3553 if (comp->opts.ShouldUsePInvokeHelpers())
3555 // First argument is the address of the frame variable.
3556 GenTree* frameAddr = new (comp, GT_LCL_VAR_ADDR)
3557 GenTreeLclVar(GT_LCL_VAR_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
3559 // Insert call to CORINFO_HELP_JIT_PINVOKE_BEGIN
3560 GenTree* helperCall =
3561 comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, comp->gtNewArgList(frameAddr));
3563 comp->fgMorphTree(helperCall);
3564 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall));
3565 LowerNode(helperCall); // helper call is inserted before current node and should be lowered here.
3569 // Emit the following sequence:
3571 // InlinedCallFrame.callTarget = methodHandle // stored in m_Datum
3572 // InlinedCallFrame.m_pCallSiteSP = SP // x86 only
3573 // InlinedCallFrame.m_pCallerReturnAddress = return address
3574 // Thread.gcState = 0
3575 // (non-stub) - update top Frame on TCB // 64-bit targets only
3577 // ----------------------------------------------------------------------------------
3578 // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it).
3579 // The actual field is InlinedCallFrame.m_Datum which has many different uses and meanings.
3581 GenTree* src = nullptr;
3583 if (callType == CT_INDIRECT)
3585 #if !defined(_TARGET_64BIT_)
3586 // On 32-bit targets, indirect calls need the size of the stack args in InlinedCallFrame.m_Datum.
3587 const unsigned numStkArgBytes = call->fgArgInfo->GetNextSlotNum() * TARGET_POINTER_SIZE;
3589 src = comp->gtNewIconNode(numStkArgBytes, TYP_INT);
3591 // On 64-bit targets, indirect calls may need the stub parameter value in InlinedCallFrame.m_Datum.
3592 // If the stub parameter value is not needed, m_Datum will be initialized by the VM.
3593 if (comp->info.compPublishStubParam)
3595 src = comp->gtNewLclvNode(comp->lvaStubArgumentVar, TYP_I_IMPL);
3597 #endif // !defined(_TARGET_64BIT_)
3601 assert(callType == CT_USER_FUNC);
3603 void* pEmbedMethodHandle = nullptr;
3604 CORINFO_METHOD_HANDLE embedMethodHandle =
3605 comp->info.compCompHnd->embedMethodHandle(call->gtCallMethHnd, &pEmbedMethodHandle);
3607 noway_assert((!embedMethodHandle) != (!pEmbedMethodHandle));
3609 if (embedMethodHandle != nullptr)
3611 // InlinedCallFrame.callSiteTarget = methodHandle
3612 src = AddrGen(embedMethodHandle);
3616 // InlinedCallFrame.callSiteTarget = *pEmbedMethodHandle
3617 src = Ind(AddrGen(pEmbedMethodHandle));
3623 // Store into InlinedCallFrame.m_Datum, the offset of which is given by offsetOfCallTarget.
3624 GenTreeLclFld* store =
3625 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
3626 callFrameInfo.offsetOfCallTarget);
3628 store->gtFlags |= GTF_VAR_DEF;
3630 InsertTreeBeforeAndContainCheck(insertBefore, store);
3635 // ----------------------------------------------------------------------------------
3636 // InlinedCallFrame.m_pCallSiteSP = SP
3638 GenTreeLclFld* storeCallSiteSP = new (comp, GT_STORE_LCL_FLD)
3639 GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
3641 storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE);
3642 storeCallSiteSP->gtFlags |= GTF_VAR_DEF;
3644 InsertTreeBeforeAndContainCheck(insertBefore, storeCallSiteSP);
3648 // ----------------------------------------------------------------------------------
3649 // InlinedCallFrame.m_pCallerReturnAddress = &label (the address of the instruction immediately following the call)
3651 GenTreeLclFld* storeLab =
3652 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
3653 callFrameInfo.offsetOfReturnAddress);
3655 // We don't have a real label, and inserting one is hard (even if we made a special node),
3656 // so for now we will just 'know' what this means in codegen.
3657 GenTreeLabel* labelRef = new (comp, GT_LABEL) GenTreeLabel(nullptr);
3658 labelRef->gtType = TYP_I_IMPL;
3659 storeLab->gtOp1 = labelRef;
3660 storeLab->gtFlags |= GTF_VAR_DEF;
3662 InsertTreeBeforeAndContainCheck(insertBefore, storeLab);
3664 // Push the PInvoke frame if necessary. On 32-bit targets this only happens in the method prolog if a method
3665 // contains PInvokes; on 64-bit targets this is necessary in non-stubs.
3666 CLANG_FORMAT_COMMENT_ANCHOR;
3668 #ifdef _TARGET_64BIT_
3669 if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
3671 // Set the TCB's frame to be the one we just created.
3672 // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME)
3673 // has prepended it to the linked list to maintain the stack of Frames.
3675 // Stubs do this once per stub, not once per call.
3676 GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
3677 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
3678 ContainCheckStoreIndir(frameUpd->AsIndir());
3680 #endif // _TARGET_64BIT_
3682 // IMPORTANT **** This instruction must come last!!! ****
3683 // It changes the thread's state to Preemptive mode
3684 // ----------------------------------------------------------------------------------
3685 // [tcb + offsetOfGcState] = 0
3687 GenTree* storeGCState = SetGCState(0);
3688 BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState));
3689 ContainCheckStoreIndir(storeGCState->AsIndir());
3692 //------------------------------------------------------------------------
3693 // InsertPInvokeCallEpilog: Insert the code that goes after every inlined pinvoke call.
3696 // call - the call for which we are inserting the PInvoke epilog.
3701 void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
3703 JITDUMP("======= Inserting PInvoke call epilog\n");
3705 if (comp->opts.ShouldUsePInvokeHelpers())
3707 noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
3709 // First argument is the address of the frame variable.
3710 GenTree* frameAddr =
3711 new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
3712 frameAddr->SetOperRaw(GT_LCL_VAR_ADDR);
3714 // Insert call to CORINFO_HELP_JIT_PINVOKE_END
3715 GenTreeCall* helperCall =
3716 comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, comp->gtNewArgList(frameAddr));
3718 comp->fgMorphTree(helperCall);
3719 BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall));
3720 ContainCheckCallOperands(helperCall);
3725 GenTree* insertionPoint = call->gtNext;
3727 GenTree* tree = SetGCState(1);
3728 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
3729 ContainCheckStoreIndir(tree->AsIndir());
3731 tree = CreateReturnTrapSeq();
3732 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
3733 ContainCheckReturnTrap(tree->AsOp());
3735 // Pop the frame if necessary. On 32-bit targets this only happens in the method epilog; on 64-bit targets thi
3736 // happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive.
3737 CLANG_FORMAT_COMMENT_ANCHOR;
3739 #ifdef _TARGET_64BIT_
3740 if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB))
3742 tree = CreateFrameLinkUpdate(PopFrame);
3743 BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
3744 ContainCheckStoreIndir(tree->AsIndir());
3747 const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
3749 // ----------------------------------------------------------------------------------
3750 // InlinedCallFrame.m_pCallerReturnAddress = nullptr
3752 GenTreeLclFld* const storeCallSiteTracker =
3753 new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
3754 callFrameInfo.offsetOfReturnAddress);
3756 GenTreeIntCon* const constantZero = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
3758 storeCallSiteTracker->gtOp1 = constantZero;
3759 storeCallSiteTracker->gtFlags |= GTF_VAR_DEF;
3761 BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker);
3762 ContainCheckStoreLoc(storeCallSiteTracker);
3763 #endif // _TARGET_64BIT_
3766 //------------------------------------------------------------------------
3767 // LowerNonvirtPinvokeCall: Lower a non-virtual / indirect PInvoke call
3770 // call - The call to lower.
3773 // The lowered call tree.
3775 GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
3777 // PInvoke lowering varies depending on the flags passed in by the EE. By default,
3778 // GC transitions are generated inline; if CORJIT_FLAG_USE_PINVOKE_HELPERS is specified,
3779 // GC transitions are instead performed using helper calls. Examples of each case are given
3780 // below. Note that the data structure that is used to store information about a call frame
3781 // containing any P/Invoke calls is initialized in the method prolog (see
3782 // InsertPInvokeMethod{Prolog,Epilog} for details).
3784 // Inline transitions:
3785 // InlinedCallFrame inlinedCallFrame;
3789 // // Set up frame information
3790 // inlinedCallFrame.callTarget = methodHandle; // stored in m_Datum
3791 // inlinedCallFrame.m_pCallSiteSP = SP; // x86 only
3792 // inlinedCallFrame.m_pCallerReturnAddress = &label; (the address of the instruction immediately following the
3794 // Thread.m_pFrame = &inlinedCallFrame; (non-IL-stub only)
3796 // // Switch the thread's GC mode to preemptive mode
3797 // thread->m_fPreemptiveGCDisabled = 0;
3799 // // Call the unmanaged method
3802 // // Switch the thread's GC mode back to cooperative mode
3803 // thread->m_fPreemptiveGCDisabled = 1;
3805 // // Rendezvous with a running collection if necessary
3806 // if (g_TrapReturningThreads)
3807 // RareDisablePreemptiveGC();
3809 // Transistions using helpers:
3811 // OpaqueFrame opaqueFrame;
3815 // // Call the JIT_PINVOKE_BEGIN helper
3816 // JIT_PINVOKE_BEGIN(&opaqueFrame);
3818 // // Call the unmanaged method
3821 // // Call the JIT_PINVOKE_END helper
3822 // JIT_PINVOKE_END(&opaqueFrame);
3824 // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target
3825 // platform. They may be changed in the future such that they preserve all register values.
3827 GenTree* result = nullptr;
3828 void* addr = nullptr;
3830 // assert we have seen one of these
3831 noway_assert(comp->info.compCallUnmanaged != 0);
3833 // All code generated by this function must not contain the randomly-inserted NOPs
3834 // that we insert to inhibit JIT spraying in partial trust scenarios.
3835 // The PINVOKE_PROLOG op signals this to the code generator/emitter.
3837 GenTree* prolog = new (comp, GT_NOP) GenTree(GT_PINVOKE_PROLOG, TYP_VOID);
3838 BlockRange().InsertBefore(call, prolog);
3840 InsertPInvokeCallProlog(call);
3842 if (call->gtCallType != CT_INDIRECT)
3844 noway_assert(call->gtCallType == CT_USER_FUNC);
3845 CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
3847 CORINFO_CONST_LOOKUP lookup;
3848 comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
3850 void* addr = lookup.addr;
3851 switch (lookup.accessType)
3854 if (!IsCallTargetInRange(addr))
3856 result = AddrGen(addr);
3860 // a direct call within range of hardware relative call instruction
3861 // stash the address for codegen
3862 call->gtDirectCallAddress = addr;
3863 #ifdef FEATURE_READYTORUN_COMPILER
3864 call->gtEntryPoint.addr = nullptr;
3865 call->gtEntryPoint.accessType = IAT_VALUE;
3871 result = Ind(AddrGen(addr));
3875 result = Ind(Ind(AddrGen(addr)));
3883 InsertPInvokeCallEpilog(call);
3888 // Expand the code necessary to calculate the control target.
3889 // Returns: the expression needed to calculate the control target
3890 // May insert embedded statements
3891 GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
3893 noway_assert(call->gtCallType == CT_USER_FUNC);
3895 // If this is a tail call via helper, thisPtr will be the third argument.
3897 regNumber thisPtrArgReg;
3899 #ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
3900 if (call->IsTailCallViaHelper())
3903 thisPtrArgReg = REG_ARG_2;
3906 #endif // !_TARGET_X86_
3909 thisPtrArgReg = comp->codeGen->genGetThisArgReg(call);
3912 // get a reference to the thisPtr being passed
3913 fgArgTabEntry* argEntry = comp->gtArgEntryByArgNum(call, thisPtrArgNum);
3914 assert(argEntry->regNum == thisPtrArgReg);
3915 assert(argEntry->node->gtOper == GT_PUTARG_REG);
3916 GenTree* thisPtr = argEntry->node->gtOp.gtOp1;
3918 // If what we are passing as the thisptr is not already a local, make a new local to place it in
3919 // because we will be creating expressions based on it.
3921 if (thisPtr->IsLocal())
3923 lclNum = thisPtr->gtLclVarCommon.gtLclNum;
3927 // Split off the thisPtr and store to a temporary variable.
3928 if (vtableCallTemp == BAD_VAR_NUM)
3930 vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call"));
3933 LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node);
3934 ReplaceWithLclVar(thisPtrUse, vtableCallTemp);
3936 lclNum = vtableCallTemp;
3939 // Get hold of the vtable offset (note: this might be expensive)
3940 unsigned vtabOffsOfIndirection;
3941 unsigned vtabOffsAfterIndirection;
3943 comp->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
3944 &vtabOffsAfterIndirection, &isRelative);
3946 // If the thisPtr is a local field, then construct a local field type node
3948 if (thisPtr->isLclField())
3950 local = new (comp, GT_LCL_FLD)
3951 GenTreeLclFld(GT_LCL_FLD, thisPtr->TypeGet(), lclNum, thisPtr->AsLclFld()->gtLclOffs);
3955 local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, thisPtr->TypeGet(), lclNum, BAD_IL_OFFSET);
3958 // pointer to virtual table = [REG_CALL_THIS + offs]
3959 GenTree* result = Ind(Offset(local, VPTR_OFFS));
3961 // Get the appropriate vtable chunk
3962 if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
3966 // MethodTable offset is a relative pointer.
3968 // Additional temporary variable is used to store virtual table pointer.
3969 // Address of method is obtained by the next computations:
3971 // Save relative offset to tmp (vtab is virtual table pointer, vtabOffsOfIndirection is offset of
3972 // vtable-1st-level-indirection):
3975 // Save address of method to result (vtabOffsAfterIndirection is offset of vtable-2nd-level-indirection):
3976 // result = [tmp + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp + vtabOffsOfIndirection]]
3979 // If relative pointers are also in second level indirection, additional temporary is used:
3981 // tmp2 = tmp1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp1 + vtabOffsOfIndirection]
3982 // result = tmp2 + [tmp2]
3984 unsigned lclNumTmp = comp->lvaGrabTemp(true DEBUGARG("lclNumTmp"));
3985 unsigned lclNumTmp2 = comp->lvaGrabTemp(true DEBUGARG("lclNumTmp2"));
3987 GenTree* lclvNodeStore = comp->gtNewTempAssign(lclNumTmp, result);
3989 GenTree* tmpTree = comp->gtNewLclvNode(lclNumTmp, result->TypeGet());
3990 tmpTree = Offset(tmpTree, vtabOffsOfIndirection);
3992 tmpTree = comp->gtNewOperNode(GT_IND, TYP_I_IMPL, tmpTree, false);
3993 GenTree* offs = comp->gtNewIconNode(vtabOffsOfIndirection + vtabOffsAfterIndirection, TYP_INT);
3994 result = comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, comp->gtNewLclvNode(lclNumTmp, result->TypeGet()), offs);
3996 GenTree* base = OffsetByIndexWithScale(result, tmpTree, 1);
3997 GenTree* lclvNodeStore2 = comp->gtNewTempAssign(lclNumTmp2, base);
3999 LIR::Range range = LIR::SeqTree(comp, lclvNodeStore);
4000 JITDUMP("result of obtaining pointer to virtual table:\n");
4002 BlockRange().InsertBefore(call, std::move(range));
4004 LIR::Range range2 = LIR::SeqTree(comp, lclvNodeStore2);
4005 JITDUMP("result of obtaining pointer to virtual table 2nd level indirection:\n");
4007 BlockRange().InsertAfter(lclvNodeStore, std::move(range2));
4009 result = Ind(comp->gtNewLclvNode(lclNumTmp2, result->TypeGet()));
4011 comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, result, comp->gtNewLclvNode(lclNumTmp2, result->TypeGet()));
4015 // result = [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
4016 result = Ind(Offset(result, vtabOffsOfIndirection));
4021 assert(!isRelative);
4024 // Load the function address
4025 // result = [reg+vtabOffs]
4028 result = Ind(Offset(result, vtabOffsAfterIndirection));
4034 // Lower stub dispatched virtual calls.
4035 GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
4037 assert(call->IsVirtualStub());
4039 // An x86 JIT which uses full stub dispatch must generate only
4040 // the following stub dispatch calls:
4042 // (1) isCallRelativeIndirect:
4043 // call dword ptr [rel32] ; FF 15 ---rel32----
4044 // (2) isCallRelative:
4045 // call abc ; E8 ---rel32----
4046 // (3) isCallRegisterIndirect:
4048 // call dword ptr [eax] ; FF 10
4050 // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
4051 // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
4053 GenTree* result = nullptr;
4055 #ifdef _TARGET_64BIT_
4056 // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef
4057 // exception. Therefore, JIT needs to emit an explicit null check. Note that Jit64 too generates
4058 // an explicit null check.
4060 // Tail calls: fgMorphTailCall() materializes null check explicitly and hence no need to emit
4063 // Non-64-bit: No need to null check the this pointer - the dispatch code will deal with this.
4064 // The VM considers exceptions that occur in stubs on 64-bit to be not managed exceptions and
4065 // it would be difficult to change this in a way so that it affects only the right stubs.
4067 if (!call->IsTailCallViaHelper())
4069 call->gtFlags |= GTF_CALL_NULLCHECK;
4073 // This is code to set up an indirect call to a stub address computed
4074 // via dictionary lookup.
4075 if (call->gtCallType == CT_INDIRECT)
4077 // The importer decided we needed a stub call via a computed
4078 // stub dispatch address, i.e. an address which came from a dictionary lookup.
4079 // - The dictionary lookup produces an indirected address, suitable for call
4080 // via "call [VirtualStubParam.reg]"
4082 // This combination will only be generated for shared generic code and when
4083 // stub dispatch is active.
4085 // fgMorphArgs will have created trees to pass the address in VirtualStubParam.reg.
4086 // All we have to do here is add an indirection to generate the actual call target.
4088 GenTree* ind = Ind(call->gtCallAddr);
4089 BlockRange().InsertAfter(call->gtCallAddr, ind);
4090 call->gtCallAddr = ind;
4092 ind->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
4094 ContainCheckIndir(ind->AsIndir());
4098 // Direct stub call.
4099 // Get stub addr. This will return NULL if virtual call stubs are not active
4100 void* stubAddr = call->gtStubCallStubAddr;
4101 noway_assert(stubAddr != nullptr);
4103 // If not CT_INDIRECT, then it should always be relative indir call.
4104 // This is ensured by VM.
4105 noway_assert(call->IsVirtualStubRelativeIndir());
4107 // Direct stub calls, though the stubAddr itself may still need to be
4108 // accessed via an indirection.
4109 GenTree* addr = AddrGen(stubAddr);
4112 // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as
4113 // the target address, and we set a flag that it's a VSD call. The helper then
4114 // handles any necessary indirection.
4115 if (call->IsTailCallViaHelper())
4119 #endif // _TARGET_X86_
4121 if (result == nullptr)
4127 // TODO-Cleanup: start emitting random NOPS
4131 //------------------------------------------------------------------------
4132 // AddrModeCleanupHelper: Remove the nodes that are no longer used after an
4133 // addressing mode is constructed
4136 // addrMode - A pointer to a new GenTreeAddrMode
4137 // node - The node currently being considered for removal
4143 // 'addrMode' and 'node' must be contained in the current block
4145 void Lowering::AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node)
4147 if (node == addrMode->Base() || node == addrMode->Index())
4152 // TODO-LIR: change this to use the LIR mark bit and iterate instead of recursing
4153 node->VisitOperands([this, addrMode](GenTree* operand) -> GenTree::VisitResult {
4154 AddrModeCleanupHelper(addrMode, operand);
4155 return GenTree::VisitResult::Continue;
4158 BlockRange().Remove(node);
4161 //------------------------------------------------------------------------
4162 // Lowering::AreSourcesPossibleModifiedLocals:
4163 // Given two nodes which will be used in an addressing mode (base,
4164 // index), check to see if they are lclVar reads, and if so, walk
4165 // backwards from the use until both reads have been visited to
4166 // determine if they are potentially modified in that range.
4169 // addr - the node that uses the base and index nodes
4170 // base - the base node
4171 // index - the index node
4173 // Returns: true if either the base or index may be modified between the
4176 bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index)
4178 assert(addr != nullptr);
4180 unsigned markCount = 0;
4182 SideEffectSet baseSideEffects;
4183 if (base != nullptr)
4185 if (base->OperIsLocalRead())
4187 baseSideEffects.AddNode(comp, base);
4195 SideEffectSet indexSideEffects;
4196 if (index != nullptr)
4198 if (index->OperIsLocalRead())
4200 indexSideEffects.AddNode(comp, index);
4208 for (GenTree* cursor = addr;; cursor = cursor->gtPrev)
4210 assert(cursor != nullptr);
4217 if (cursor == index)
4222 if ((base == nullptr) && (index == nullptr))
4227 m_scratchSideEffects.Clear();
4228 m_scratchSideEffects.AddNode(comp, cursor);
4229 if ((base != nullptr) && m_scratchSideEffects.InterferesWith(baseSideEffects, false))
4234 if ((index != nullptr) && m_scratchSideEffects.InterferesWith(indexSideEffects, false))
4241 //------------------------------------------------------------------------
4242 // TryCreateAddrMode: recognize trees which can be implemented using an
4243 // addressing mode and transform them to a GT_LEA
4246 // use: the use of the address we want to transform
4247 // isIndir: true if this addressing mode is the child of an indir
4250 // The created LEA node or the original address node if an LEA could
4253 GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
4255 GenTree* addr = use.Def();
4256 GenTree* base = nullptr;
4257 GenTree* index = nullptr;
4262 // TODO-1stClassStructs: This logic is here to preserve prior behavior. Note that previously
4263 // block ops were not considered for addressing modes, but an add under it may have been.
4264 // This should be replaced with logic that more carefully determines when an addressing mode
4265 // would be beneficial for a block op.
4268 GenTree* indir = use.User();
4269 if (indir->TypeGet() == TYP_STRUCT)
4273 else if (varTypeIsStruct(indir))
4275 // We can have an indirection on the rhs of a block copy (it is the source
4276 // object). This is not a "regular" indirection.
4277 // (Note that the user check could be costly.)
4279 if (BlockRange().TryGetUse(indir, &indirUse) && indirUse.User()->OperIsIndir())
4285 isIndir = !indir->OperIsBlk();
4290 // Find out if an addressing mode can be constructed
4291 bool doAddrMode = comp->codeGen->genCreateAddrMode(addr, // address
4293 &rev, // reverse ops
4295 &index, // index val
4296 #if SCALED_ADDR_MODES
4298 #endif // SCALED_ADDR_MODES
4299 &offset); // displacement
4308 // this is just a reg-const add
4309 if (index == nullptr)
4314 // this is just a reg-reg add
4315 if (scale == 1 && offset == 0)
4321 // make sure there are not any side effects between def of leaves and use
4322 if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index))
4324 JITDUMP("No addressing mode:\n ");
4329 GenTree* arrLength = nullptr;
4331 JITDUMP("Addressing mode:\n");
4332 JITDUMP(" Base\n ");
4334 if (index != nullptr)
4336 JITDUMP(" + Index * %u + %d\n ", scale, offset);
4341 JITDUMP(" + %d\n", offset);
4344 var_types addrModeType = addr->TypeGet();
4345 if (addrModeType == TYP_REF)
4347 addrModeType = TYP_BYREF;
4350 GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset);
4352 // Neither the base nor the index should now be contained.
4353 if (base != nullptr)
4355 base->ClearContained();
4357 if (index != nullptr)
4359 index->ClearContained();
4361 addrMode->gtRsvdRegs = addr->gtRsvdRegs;
4362 addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS);
4363 addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free.
4365 JITDUMP("New addressing mode node:\n");
4369 BlockRange().InsertAfter(addr, addrMode);
4371 // Now we need to remove all the nodes subsumed by the addrMode
4372 AddrModeCleanupHelper(addrMode, addr);
4374 // Replace the original address node with the addrMode.
4375 use.ReplaceWith(comp, addrMode);
4380 //------------------------------------------------------------------------
4381 // LowerAdd: turn this add into a GT_LEA if that would be profitable
4384 // node - the node we care about
4387 // The next node to lower if we have transformed the ADD; nullptr otherwise.
4389 GenTree* Lowering::LowerAdd(GenTree* node)
4391 GenTree* next = node->gtNext;
4393 #ifndef _TARGET_ARMARCH_
4394 if (varTypeIsIntegralOrI(node))
4397 if (BlockRange().TryGetUse(node, &use))
4399 // If this is a child of an indir, let the parent handle it.
4400 // If there is a chain of adds, only look at the topmost one.
4401 GenTree* parent = use.User();
4402 if (!parent->OperIsIndir() && (parent->gtOper != GT_ADD))
4404 GenTree* addr = TryCreateAddrMode(std::move(use), false);
4407 return addr->gtNext;
4412 #endif // !_TARGET_ARMARCH_
4417 //------------------------------------------------------------------------
4418 // LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node.
4421 // divMod - pointer to the GT_UDIV/GT_UMOD node to be lowered
4424 // Returns a boolean indicating whether the node was transformed.
4427 // - Transform UDIV/UMOD by power of 2 into RSZ/AND
4428 // - Transform UDIV by constant >= 2^(N-1) into GE
4429 // - Transform UDIV/UMOD by constant >= 3 into "magic division"
4432 bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
4434 assert(divMod->OperIs(GT_UDIV, GT_UMOD));
4436 #if defined(USE_HELPERS_FOR_INT_DIV)
4437 if (!varTypeIsIntegral(divMod->TypeGet()))
4439 assert(!"unreachable: integral GT_UDIV/GT_UMOD should get morphed into helper calls");
4441 assert(varTypeIsFloating(divMod->TypeGet()));
4442 #endif // USE_HELPERS_FOR_INT_DIV
4443 #if defined(_TARGET_ARM64_)
4444 assert(divMod->OperGet() != GT_UMOD);
4445 #endif // _TARGET_ARM64_
4447 GenTree* next = divMod->gtNext;
4448 GenTree* dividend = divMod->gtGetOp1();
4449 GenTree* divisor = divMod->gtGetOp2();
4451 #if !defined(_TARGET_64BIT_)
4452 if (dividend->OperIs(GT_LONG))
4458 if (!divisor->IsCnsIntOrI())
4463 if (dividend->IsCnsIntOrI())
4465 // We shouldn't see a divmod with constant operands here but if we do then it's likely
4466 // because optimizations are disabled or it's a case that's supposed to throw an exception.
4467 // Don't optimize this.
4471 const var_types type = divMod->TypeGet();
4472 assert((type == TYP_INT) || (type == TYP_I_IMPL));
4474 size_t divisorValue = static_cast<size_t>(divisor->AsIntCon()->IconValue());
4476 if (type == TYP_INT)
4478 // Clear up the upper 32 bits of the value, they may be set to 1 because constants
4479 // are treated as signed and stored in ssize_t which is 64 bit in size on 64 bit targets.
4480 divisorValue &= UINT32_MAX;
4483 if (divisorValue == 0)
4488 const bool isDiv = divMod->OperIs(GT_UDIV);
4490 if (isPow2(divisorValue))
4497 divisorValue = genLog2(divisorValue);
4505 divMod->SetOper(newOper);
4506 divisor->gtIntCon.SetIconValue(divisorValue);
4507 ContainCheckNode(divMod);
4512 // If the divisor is greater or equal than 2^(N - 1) then the result is 1
4513 // iff the dividend is greater or equal than the divisor.
4514 if (((type == TYP_INT) && (divisorValue > (UINT32_MAX / 2))) ||
4515 ((type == TYP_LONG) && (divisorValue > (UINT64_MAX / 2))))
4517 divMod->SetOper(GT_GE);
4518 divMod->gtFlags |= GTF_UNSIGNED;
4519 ContainCheckNode(divMod);
4524 // TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32
4525 #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
4526 if (!comp->opts.MinOpts() && (divisorValue >= 3))
4532 if (type == TYP_INT)
4534 magic = MagicDivide::GetUnsigned32Magic(static_cast<uint32_t>(divisorValue), &add, &shift);
4538 #ifdef _TARGET_64BIT_
4539 magic = MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &add, &shift);
4545 // Depending on the "add" flag returned by GetUnsignedMagicNumberForDivide we need to generate:
4546 // add == false (when divisor == 3 for example):
4547 // div = (dividend MULHI magic) RSZ shift
4548 // add == true (when divisor == 7 for example):
4549 // mulhi = dividend MULHI magic
4550 // div = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1)
4551 const bool requiresAdjustment = add;
4552 const bool requiresDividendMultiuse = requiresAdjustment || !isDiv;
4553 const unsigned curBBWeight = m_block->getBBWeight(comp);
4554 unsigned dividendLclNum = BAD_VAR_NUM;
4556 if (requiresDividendMultiuse)
4558 LIR::Use dividendUse(BlockRange(), &divMod->gtOp1, divMod);
4559 dividendLclNum = ReplaceWithLclVar(dividendUse);
4560 dividend = divMod->gtGetOp1();
4563 // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node.
4564 // The existing node will later be transformed into a GT_RSZ/GT_SUB that
4565 // computes the final result. This way don't need to find and change the use
4566 // of the existing node.
4567 GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, dividend, divisor);
4568 mulhi->gtFlags |= GTF_UNSIGNED;
4569 divisor->AsIntCon()->SetIconValue(magic);
4570 BlockRange().InsertBefore(divMod, mulhi);
4571 GenTree* firstNode = mulhi;
4573 if (requiresAdjustment)
4575 GenTree* dividend = comp->gtNewLclvNode(dividendLclNum, type);
4576 GenTree* sub = comp->gtNewOperNode(GT_SUB, type, dividend, mulhi);
4577 BlockRange().InsertBefore(divMod, dividend, sub);
4579 GenTree* one = comp->gtNewIconNode(1, TYP_INT);
4580 GenTree* rsz = comp->gtNewOperNode(GT_RSZ, type, sub, one);
4581 BlockRange().InsertBefore(divMod, one, rsz);
4583 LIR::Use mulhiUse(BlockRange(), &sub->gtOp.gtOp2, sub);
4584 unsigned mulhiLclNum = ReplaceWithLclVar(mulhiUse);
4586 GenTree* mulhiCopy = comp->gtNewLclvNode(mulhiLclNum, type);
4587 GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhiCopy);
4588 BlockRange().InsertBefore(divMod, mulhiCopy, add);
4594 GenTree* shiftBy = comp->gtNewIconNode(shift, TYP_INT);
4595 BlockRange().InsertBefore(divMod, shiftBy);
4599 divMod->SetOper(GT_RSZ);
4600 divMod->gtOp1 = mulhi;
4601 divMod->gtOp2 = shiftBy;
4605 GenTree* div = comp->gtNewOperNode(GT_RSZ, type, mulhi, shiftBy);
4607 // divisor UMOD dividend = dividend SUB (div MUL divisor)
4608 GenTree* divisor = comp->gtNewIconNode(divisorValue, type);
4609 GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor);
4610 GenTree* dividend = comp->gtNewLclvNode(dividendLclNum, type);
4612 divMod->SetOper(GT_SUB);
4613 divMod->gtOp1 = dividend;
4614 divMod->gtOp2 = mul;
4616 BlockRange().InsertBefore(divMod, div, divisor, mul, dividend);
4618 ContainCheckRange(firstNode, divMod);
4626 // LowerConstIntDivOrMod: Transform integer GT_DIV/GT_MOD nodes with a power of 2
4627 // const divisor into equivalent but faster sequences.
4630 // node - pointer to the DIV or MOD node
4633 // nullptr if no transformation is done, or the next node in the transformed node sequence that
4634 // needs to be lowered.
4636 GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
4638 assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
4639 GenTree* divMod = node;
4640 GenTree* dividend = divMod->gtGetOp1();
4641 GenTree* divisor = divMod->gtGetOp2();
4643 const var_types type = divMod->TypeGet();
4644 assert((type == TYP_INT) || (type == TYP_LONG));
4646 #if defined(USE_HELPERS_FOR_INT_DIV)
4647 assert(!"unreachable: integral GT_DIV/GT_MOD should get morphed into helper calls");
4648 #endif // USE_HELPERS_FOR_INT_DIV
4649 #if defined(_TARGET_ARM64_)
4650 assert(node->OperGet() != GT_MOD);
4651 #endif // _TARGET_ARM64_
4653 if (!divisor->IsCnsIntOrI())
4655 return nullptr; // no transformations to make
4658 if (dividend->IsCnsIntOrI())
4660 // We shouldn't see a divmod with constant operands here but if we do then it's likely
4661 // because optimizations are disabled or it's a case that's supposed to throw an exception.
4662 // Don't optimize this.
4666 ssize_t divisorValue = divisor->gtIntCon.IconValue();
4668 if (divisorValue == -1 || divisorValue == 0)
4670 // x / 0 and x % 0 can't be optimized because they are required to throw an exception.
4672 // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception.
4674 // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is
4675 // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this
4676 // case so optimizing this case would break C# code.
4678 // A runtime check could be used to handle this case but it's probably too rare to matter.
4682 bool isDiv = divMod->OperGet() == GT_DIV;
4686 if ((type == TYP_INT && divisorValue == INT_MIN) || (type == TYP_LONG && divisorValue == INT64_MIN))
4688 // If the divisor is the minimum representable integer value then we can use a compare,
4689 // the result is 1 iff the dividend equals divisor.
4690 divMod->SetOper(GT_EQ);
4695 size_t absDivisorValue =
4696 (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue));
4698 if (!isPow2(absDivisorValue))
4700 if (comp->opts.MinOpts())
4705 #if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
4709 if (type == TYP_INT)
4711 magic = MagicDivide::GetSigned32Magic(static_cast<int32_t>(divisorValue), &shift);
4715 #ifdef _TARGET_64BIT_
4716 magic = MagicDivide::GetSigned64Magic(static_cast<int64_t>(divisorValue), &shift);
4717 #else // !_TARGET_64BIT_
4719 #endif // !_TARGET_64BIT_
4722 divisor->gtIntConCommon.SetIconValue(magic);
4724 // Insert a new GT_MULHI node in front of the existing GT_DIV/GT_MOD node.
4725 // The existing node will later be transformed into a GT_ADD/GT_SUB that
4726 // computes the final result. This way don't need to find and change the
4727 // use of the existing node.
4728 GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, divisor, dividend);
4729 BlockRange().InsertBefore(divMod, mulhi);
4731 // mulhi was the easy part. Now we need to generate different code depending
4732 // on the divisor value:
4734 // div = signbit(mulhi) + mulhi
4736 // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust
4738 // mulhi += dividend ; requires add adjust
4739 // div = signbit(mulhi) + sar(mulhi, 2) ; requires shift adjust
4741 // mulhi -= dividend ; requires sub adjust
4742 // div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust
4743 bool requiresAddSubAdjust = signum(divisorValue) != signum(magic);
4744 bool requiresShiftAdjust = shift != 0;
4745 bool requiresDividendMultiuse = requiresAddSubAdjust || !isDiv;
4746 unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
4747 unsigned dividendLclNum = BAD_VAR_NUM;
4749 if (requiresDividendMultiuse)
4751 LIR::Use dividendUse(BlockRange(), &mulhi->gtOp.gtOp2, mulhi);
4752 dividendLclNum = ReplaceWithLclVar(dividendUse);
4757 if (requiresAddSubAdjust)
4759 dividend = comp->gtNewLclvNode(dividendLclNum, type);
4760 adjusted = comp->gtNewOperNode(divisorValue > 0 ? GT_ADD : GT_SUB, type, mulhi, dividend);
4761 BlockRange().InsertBefore(divMod, dividend, adjusted);
4768 GenTree* shiftBy = comp->gtNewIconNode(genTypeSize(type) * 8 - 1, type);
4769 GenTree* signBit = comp->gtNewOperNode(GT_RSZ, type, adjusted, shiftBy);
4770 BlockRange().InsertBefore(divMod, shiftBy, signBit);
4772 LIR::Use adjustedUse(BlockRange(), &signBit->gtOp.gtOp1, signBit);
4773 unsigned adjustedLclNum = ReplaceWithLclVar(adjustedUse);
4774 adjusted = comp->gtNewLclvNode(adjustedLclNum, type);
4775 BlockRange().InsertBefore(divMod, adjusted);
4777 if (requiresShiftAdjust)
4779 shiftBy = comp->gtNewIconNode(shift, TYP_INT);
4780 adjusted = comp->gtNewOperNode(GT_RSH, type, adjusted, shiftBy);
4781 BlockRange().InsertBefore(divMod, shiftBy, adjusted);
4786 divMod->SetOperRaw(GT_ADD);
4787 divMod->gtOp.gtOp1 = adjusted;
4788 divMod->gtOp.gtOp2 = signBit;
4792 GenTree* div = comp->gtNewOperNode(GT_ADD, type, adjusted, signBit);
4794 dividend = comp->gtNewLclvNode(dividendLclNum, type);
4796 // divisor % dividend = dividend - divisor x div
4797 GenTree* divisor = comp->gtNewIconNode(divisorValue, type);
4798 GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor);
4799 BlockRange().InsertBefore(divMod, dividend, div, divisor, mul);
4801 divMod->SetOperRaw(GT_SUB);
4802 divMod->gtOp.gtOp1 = dividend;
4803 divMod->gtOp.gtOp2 = mul;
4807 #elif defined(_TARGET_ARM_)
4808 // Currently there's no GT_MULHI for ARM32
4811 #error Unsupported or unset target architecture
4815 // We're committed to the conversion now. Go find the use if any.
4817 if (!BlockRange().TryGetUse(node, &use))
4822 // We need to use the dividend node multiple times so its value needs to be
4823 // computed once and stored in a temp variable.
4825 unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
4827 LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod);
4828 ReplaceWithLclVar(opDividend);
4830 dividend = divMod->gtGetOp1();
4831 assert(dividend->OperGet() == GT_LCL_VAR);
4833 unsigned dividendLclNum = dividend->gtLclVar.gtLclNum;
4835 GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63));
4837 if (absDivisorValue == 2)
4839 // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1.
4840 // We can get the same result by using GT_RSZ instead of GT_RSH.
4841 adjustment->SetOper(GT_RSZ);
4845 adjustment = comp->gtNewOperNode(GT_AND, type, adjustment, comp->gtNewIconNode(absDivisorValue - 1, type));
4848 GenTree* adjustedDividend =
4849 comp->gtNewOperNode(GT_ADD, type, adjustment, comp->gtNewLclvNode(dividendLclNum, type));
4855 // perform the division by right shifting the adjusted dividend
4856 divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue));
4858 newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor);
4859 ContainCheckShiftRotate(newDivMod->AsOp());
4861 if (divisorValue < 0)
4863 // negate the result if the divisor is negative
4864 newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod);
4865 ContainCheckNode(newDivMod);
4870 // divisor % dividend = dividend - divisor x (dividend / divisor)
4871 // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor)
4872 // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1)
4873 divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1));
4875 newDivMod = comp->gtNewOperNode(GT_SUB, type, comp->gtNewLclvNode(dividendLclNum, type),
4876 comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor));
4879 // Remove the divisor and dividend nodes from the linear order,
4880 // since we have reused them and will resequence the tree
4881 BlockRange().Remove(divisor);
4882 BlockRange().Remove(dividend);
4884 // linearize and insert the new tree before the original divMod node
4885 InsertTreeBeforeAndContainCheck(divMod, newDivMod);
4886 BlockRange().Remove(divMod);
4888 // replace the original divmod node with the new divmod tree
4889 use.ReplaceWith(comp, newDivMod);
4891 return newDivMod->gtNext;
4893 //------------------------------------------------------------------------
4894 // LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
4895 // const divisor into equivalent but faster sequences.
4898 // node - the DIV or MOD node
4901 // The next node to lower.
4903 GenTree* Lowering::LowerSignedDivOrMod(GenTree* node)
4905 assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
4906 GenTree* next = node->gtNext;
4907 GenTree* divMod = node;
4908 GenTree* dividend = divMod->gtGetOp1();
4909 GenTree* divisor = divMod->gtGetOp2();
4911 if (varTypeIsIntegral(node->TypeGet()))
4913 // LowerConstIntDivOrMod will return nullptr if it doesn't transform the node.
4914 GenTree* newNode = LowerConstIntDivOrMod(node);
4915 if (newNode != nullptr)
4920 ContainCheckDivOrMod(node->AsOp());
4925 //------------------------------------------------------------------------
4926 // LowerShift: Lower shift nodes
4929 // shift - the shift node (GT_LSH, GT_RSH or GT_RSZ)
4932 // Remove unnecessary shift count masking, xarch shift instructions
4933 // mask the shift count to 5 bits (or 6 bits for 64 bit operations).
4935 void Lowering::LowerShift(GenTreeOp* shift)
4937 assert(shift->OperIs(GT_LSH, GT_RSH, GT_RSZ));
4940 #ifdef _TARGET_64BIT_
4941 if (varTypeIsLong(shift->TypeGet()))
4946 assert(!varTypeIsLong(shift->TypeGet()));
4949 for (GenTree* andOp = shift->gtGetOp2(); andOp->OperIs(GT_AND); andOp = andOp->gtGetOp1())
4951 GenTree* maskOp = andOp->gtGetOp2();
4953 if (!maskOp->IsCnsIntOrI())
4958 if ((static_cast<size_t>(maskOp->AsIntCon()->IconValue()) & mask) != mask)
4963 shift->gtOp2 = andOp->gtGetOp1();
4964 BlockRange().Remove(andOp);
4965 BlockRange().Remove(maskOp);
4966 // The parent was replaced, clear contain and regOpt flag.
4967 shift->gtOp2->ClearContained();
4969 ContainCheckShiftRotate(shift);
4972 void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node)
4975 if (node->TypeGet() == TYP_SIMD12)
4978 // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
4979 // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
4980 // reading and writing purposes.
4983 // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
4984 // registers or on stack, the upper most 4-bytes will be zero.
4986 // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
4987 // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
4990 // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
4991 // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
4992 // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
4993 // there is no need to clear upper 4-bytes of Vector3 type args.
4995 // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
4996 // Vector3 return values are returned two return registers and Caller assembles them into a
4997 // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
4998 // type args in prolog and Vector3 type return value of a call
5000 // RyuJIT x86 Windows: all non-param Vector3 local vars are allocated as 16 bytes. Vector3 arguments
5001 // are pushed as 12 bytes. For return values, a 16-byte local is allocated and the address passed
5002 // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear
5005 unsigned varNum = node->AsLclVarCommon()->GetLclNum();
5006 LclVarDsc* varDsc = &comp->lvaTable[varNum];
5008 if (comp->lvaMapSimd12ToSimd16(varDsc))
5010 JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n");
5012 JITDUMP("============");
5014 node->gtType = TYP_SIMD16;
5017 #endif // FEATURE_SIMD
5020 //------------------------------------------------------------------------
5021 // LowerArrElem: Lower a GT_ARR_ELEM node
5024 // node - the GT_ARR_ELEM node to lower.
5027 // The next node to lower.
5030 // pTree points to a pointer to a GT_ARR_ELEM node.
5033 // This performs the following lowering. We start with a node of the form:
5039 // First, we create temps for arrObj if it is not already a lclVar, and for any of the index
5040 // expressions that have side-effects.
5041 // We then transform the tree into:
5042 // <offset is null - no accumulated offset for the first index>
5045 // /--* ArrIndex[i, ]
5047 // /--| arrOffs[i, ]
5050 // +--* ArrIndex[*,j]
5052 // /--| arrOffs[*,j]
5053 // +--* lclVar NewTemp
5054 // /--* lea (scale = element size, offset = offset of first element)
5056 // The new stmtExpr may be omitted if the <arrObj> is a lclVar.
5057 // The new stmtExpr may be embedded if the <arrObj> is not the first tree in linear order for
5058 // the statement containing the original arrMD.
5059 // Note that the arrMDOffs is the INDEX of the lea, but is evaluated before the BASE (which is the second
5060 // reference to NewTemp), because that provides more accurate lifetimes.
5061 // There may be 1, 2 or 3 dimensions, with 1, 2 or 3 arrMDIdx nodes, respectively.
5063 GenTree* Lowering::LowerArrElem(GenTree* node)
5065 // This will assert if we don't have an ArrElem node
5066 GenTreeArrElem* arrElem = node->AsArrElem();
5067 const unsigned char rank = arrElem->gtArrElem.gtArrRank;
5068 const unsigned blockWeight = m_block->getBBWeight(comp);
5070 JITDUMP("Lowering ArrElem\n");
5071 JITDUMP("============\n");
5072 DISPTREERANGE(BlockRange(), arrElem);
5075 assert(arrElem->gtArrObj->TypeGet() == TYP_REF);
5077 // We need to have the array object in a lclVar.
5078 if (!arrElem->gtArrObj->IsLocal())
5080 LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem);
5081 ReplaceWithLclVar(arrObjUse);
5084 GenTree* arrObjNode = arrElem->gtArrObj;
5085 assert(arrObjNode->IsLocal());
5087 LclVarDsc* const varDsc = &comp->lvaTable[arrElem->gtArrObj->AsLclVarCommon()->gtLclNum];
5089 GenTree* insertionPoint = arrElem;
5091 // The first ArrOffs node will have 0 for the offset of the previous dimension.
5092 GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
5093 BlockRange().InsertBefore(insertionPoint, prevArrOffs);
5094 GenTree* nextToLower = prevArrOffs;
5096 for (unsigned char dim = 0; dim < rank; dim++)
5098 GenTree* indexNode = arrElem->gtArrElem.gtArrInds[dim];
5100 // Use the original arrObjNode on the 0th ArrIndex node, and clone it for subsequent ones.
5101 GenTree* idxArrObjNode;
5104 idxArrObjNode = arrObjNode;
5108 idxArrObjNode = comp->gtClone(arrObjNode);
5109 BlockRange().InsertBefore(insertionPoint, idxArrObjNode);
5112 // Next comes the GT_ARR_INDEX node.
5113 GenTreeArrIndex* arrMDIdx = new (comp, GT_ARR_INDEX)
5114 GenTreeArrIndex(TYP_INT, idxArrObjNode, indexNode, dim, rank, arrElem->gtArrElem.gtArrElemType);
5115 arrMDIdx->gtFlags |= ((idxArrObjNode->gtFlags | indexNode->gtFlags) & GTF_ALL_EFFECT);
5116 BlockRange().InsertBefore(insertionPoint, arrMDIdx);
5118 GenTree* offsArrObjNode = comp->gtClone(arrObjNode);
5119 BlockRange().InsertBefore(insertionPoint, offsArrObjNode);
5121 GenTreeArrOffs* arrOffs =
5122 new (comp, GT_ARR_OFFSET) GenTreeArrOffs(TYP_I_IMPL, prevArrOffs, arrMDIdx, offsArrObjNode, dim, rank,
5123 arrElem->gtArrElem.gtArrElemType);
5124 arrOffs->gtFlags |= ((prevArrOffs->gtFlags | arrMDIdx->gtFlags | offsArrObjNode->gtFlags) & GTF_ALL_EFFECT);
5125 BlockRange().InsertBefore(insertionPoint, arrOffs);
5127 prevArrOffs = arrOffs;
5130 // Generate the LEA and make it reverse evaluation, because we want to evaluate the index expression before the
5132 unsigned scale = arrElem->gtArrElem.gtArrElemSize;
5133 unsigned offset = comp->eeGetMDArrayDataOffset(arrElem->gtArrElem.gtArrElemType, arrElem->gtArrElem.gtArrRank);
5135 GenTree* leaIndexNode = prevArrOffs;
5136 if (!jitIsScaleIndexMul(scale))
5138 // We do the address arithmetic in TYP_I_IMPL, though note that the lower bounds and lengths in memory are
5140 GenTree* scaleNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, scale);
5141 GenTree* mulNode = new (comp, GT_MUL) GenTreeOp(GT_MUL, TYP_I_IMPL, leaIndexNode, scaleNode);
5142 BlockRange().InsertBefore(insertionPoint, scaleNode, mulNode);
5143 leaIndexNode = mulNode;
5147 GenTree* leaBase = comp->gtClone(arrObjNode);
5148 BlockRange().InsertBefore(insertionPoint, leaBase);
5150 GenTree* leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset);
5152 BlockRange().InsertBefore(insertionPoint, leaNode);
5154 LIR::Use arrElemUse;
5155 if (BlockRange().TryGetUse(arrElem, &arrElemUse))
5157 arrElemUse.ReplaceWith(comp, leaNode);
5161 leaNode->SetUnusedValue();
5164 BlockRange().Remove(arrElem);
5166 JITDUMP("Results of lowering ArrElem:\n");
5167 DISPTREERANGE(BlockRange(), leaNode);
5173 void Lowering::DoPhase()
5175 // If we have any PInvoke calls, insert the one-time prolog code. We'll inserted the epilog code in the
5176 // appropriate spots later. NOTE: there is a minor optimization opportunity here, as we still create p/invoke
5177 // data structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
5178 if (comp->info.compCallUnmanaged)
5180 InsertPInvokeMethodProlog();
5183 #if !defined(_TARGET_64BIT_)
5184 DecomposeLongs decomp(comp); // Initialize the long decomposition class.
5185 if (comp->compLongUsed)
5187 decomp.PrepareForDecomposition();
5189 #endif // !defined(_TARGET_64BIT_)
5191 for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
5193 /* Make the block publicly available */
5194 comp->compCurBB = block;
5196 #if !defined(_TARGET_64BIT_)
5197 if (comp->compLongUsed)
5199 decomp.DecomposeBlock(block);
5201 #endif //!_TARGET_64BIT_
5207 JITDUMP("Lower has completed modifying nodes.\n");
5210 comp->fgDispBasicBlocks(true);
5214 // Recompute local var ref counts before potentially sorting for liveness.
5215 // Note this does minimal work in cases where we are not going to sort.
5216 const bool isRecompute = true;
5217 const bool setSlotNumbers = false;
5218 comp->lvaComputeRefCounts(isRecompute, setSlotNumbers);
5220 comp->fgLocalVarLiveness();
5221 // local var liveness can delete code, which may create empty blocks
5222 if (!comp->opts.MinOpts() && !comp->opts.compDbgCode)
5224 comp->optLoopsMarked = false;
5225 bool modified = comp->fgUpdateFlowGraph();
5228 JITDUMP("had to run another liveness pass:\n");
5229 comp->fgLocalVarLiveness();
5233 // Recompute local var ref counts again after liveness to reflect
5234 // impact of any dead code removal. Note this may leave us with
5235 // tracked vars that have zero refs.
5236 comp->lvaComputeRefCounts(isRecompute, setSlotNumbers);
5239 JITDUMP("Liveness pass finished after lowering, IR:\n");
5242 comp->fgDispBasicBlocks(true);
5245 for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
5247 assert(LIR::AsRange(block).CheckLIR(comp, true));
5254 //------------------------------------------------------------------------
5255 // Lowering::CheckCallArg: check that a call argument is in an expected
5256 // form after lowering.
5259 // arg - the argument to check.
5261 void Lowering::CheckCallArg(GenTree* arg)
5263 if (!arg->IsValue() && !arg->OperIsPutArgStk())
5265 assert((arg->OperIsStore() && !arg->IsValue()) || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() ||
5266 arg->OperIsCopyBlkOp());
5270 switch (arg->OperGet())
5274 GenTreeFieldList* list = arg->AsFieldList();
5275 assert(list->isContained());
5276 assert(list->IsFieldListHead());
5278 for (; list != nullptr; list = list->Rest())
5280 assert(list->Current()->OperIsPutArg());
5286 assert(arg->OperIsPutArg());
5291 //------------------------------------------------------------------------
5292 // Lowering::CheckCall: check that a call is in an expected form after
5293 // lowering. Currently this amounts to checking its
5294 // arguments, but could be expanded to verify more
5295 // properties in the future.
5298 // call - the call to check.
5300 void Lowering::CheckCall(GenTreeCall* call)
5302 if (call->gtCallObjp != nullptr)
5304 CheckCallArg(call->gtCallObjp);
5307 for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
5309 CheckCallArg(args->Current());
5312 for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest())
5314 CheckCallArg(args->Current());
5318 //------------------------------------------------------------------------
5319 // Lowering::CheckNode: check that an LIR node is in an expected form
5323 // compiler - the compiler context.
5324 // node - the node to check.
5326 void Lowering::CheckNode(Compiler* compiler, GenTree* node)
5328 switch (node->OperGet())
5331 CheckCall(node->AsCall());
5336 assert(node->TypeGet() != TYP_SIMD12);
5338 #ifdef _TARGET_64BIT_
5340 case GT_STORE_LCL_VAR:
5342 unsigned lclNum = node->AsLclVarCommon()->GetLclNum();
5343 LclVarDsc* lclVar = &compiler->lvaTable[lclNum];
5344 assert(node->TypeGet() != TYP_SIMD12 || compiler->lvaIsFieldOfDependentlyPromotedStruct(lclVar));
5347 #endif // _TARGET_64BIT_
5355 //------------------------------------------------------------------------
5356 // Lowering::CheckBlock: check that the contents of an LIR block are in an
5357 // expected form after lowering.
5360 // compiler - the compiler context.
5361 // block - the block to check.
5363 bool Lowering::CheckBlock(Compiler* compiler, BasicBlock* block)
5365 assert(block->isEmpty() || block->IsLIR());
5367 LIR::Range& blockRange = LIR::AsRange(block);
5368 for (GenTree* node : blockRange)
5370 CheckNode(compiler, node);
5373 assert(blockRange.CheckLIR(compiler, true));
5378 void Lowering::LowerBlock(BasicBlock* block)
5380 assert(block == comp->compCurBB); // compCurBB must already be set.
5381 assert(block->isEmpty() || block->IsLIR());
5385 // NOTE: some of the lowering methods insert calls before the node being
5386 // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In
5387 // general, any code that is inserted before the current node should be
5388 // "pre-lowered" as they won't be subject to further processing.
5389 // Lowering::CheckBlock() runs some extra checks on call arguments in
5390 // order to help catch unlowered nodes.
5392 GenTree* node = BlockRange().FirstNode();
5393 while (node != nullptr)
5395 node = LowerNode(node);
5398 assert(CheckBlock(comp, block));
5401 /** Verifies if both of these trees represent the same indirection.
5402 * Used by Lower to annotate if CodeGen generate an instruction of the
5403 * form *addrMode BinOp= expr
5405 * Preconditions: both trees are children of GT_INDs and their underlying children
5406 * have the same gtOper.
5408 * This is a first iteration to actually recognize trees that can be code-generated
5409 * as a single read-modify-write instruction on AMD64/x86. For now
5410 * this method only supports the recognition of simple addressing modes (through GT_LEA)
5411 * or local var indirections. Local fields, array access and other more complex nodes are
5412 * not yet supported.
5414 * TODO-CQ: Perform tree recognition by using the Value Numbering Package, that way we can recognize
5415 * arbitrary complex trees and support much more addressing patterns.
5417 bool Lowering::IndirsAreEquivalent(GenTree* candidate, GenTree* storeInd)
5419 assert(candidate->OperGet() == GT_IND);
5420 assert(storeInd->OperGet() == GT_STOREIND);
5422 // We should check the size of the indirections. If they are
5423 // different, say because of a cast, then we can't call them equivalent. Doing so could cause us
5425 // Signed-ness difference is okay and expected since a store indirection must always
5426 // be signed based on the CIL spec, but a load could be unsigned.
5427 if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType))
5432 GenTree* pTreeA = candidate->gtGetOp1();
5433 GenTree* pTreeB = storeInd->gtGetOp1();
5435 // This method will be called by codegen (as well as during lowering).
5436 // After register allocation, the sources may have been spilled and reloaded
5437 // to a different register, indicated by an inserted GT_RELOAD node.
5438 pTreeA = pTreeA->gtSkipReloadOrCopy();
5439 pTreeB = pTreeB->gtSkipReloadOrCopy();
5443 if (pTreeA->OperGet() != pTreeB->OperGet())
5448 oper = pTreeA->OperGet();
5452 case GT_LCL_VAR_ADDR:
5453 case GT_CLS_VAR_ADDR:
5455 return NodesAreEquivalentLeaves(pTreeA, pTreeB);
5459 GenTreeAddrMode* gtAddr1 = pTreeA->AsAddrMode();
5460 GenTreeAddrMode* gtAddr2 = pTreeB->AsAddrMode();
5461 return NodesAreEquivalentLeaves(gtAddr1->Base(), gtAddr2->Base()) &&
5462 NodesAreEquivalentLeaves(gtAddr1->Index(), gtAddr2->Index()) &&
5463 (gtAddr1->gtScale == gtAddr2->gtScale) && (gtAddr1->Offset() == gtAddr2->Offset());
5466 // We don't handle anything that is not either a constant,
5467 // a local var or LEA.
5472 /** Test whether the two given nodes are the same leaves.
5473 * Right now, only constant integers and local variables are supported
5475 bool Lowering::NodesAreEquivalentLeaves(GenTree* tree1, GenTree* tree2)
5477 if (tree1 == nullptr && tree2 == nullptr)
5482 // both null, they are equivalent, otherwise if either is null not equivalent
5483 if (tree1 == nullptr || tree2 == nullptr)
5488 tree1 = tree1->gtSkipReloadOrCopy();
5489 tree2 = tree2->gtSkipReloadOrCopy();
5491 if (tree1->TypeGet() != tree2->TypeGet())
5496 if (tree1->OperGet() != tree2->OperGet())
5501 if (!tree1->OperIsLeaf() || !tree2->OperIsLeaf())
5506 switch (tree1->OperGet())
5509 return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal &&
5510 tree1->IsIconHandle() == tree2->IsIconHandle();
5512 case GT_LCL_VAR_ADDR:
5513 return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum;
5514 case GT_CLS_VAR_ADDR:
5515 return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd;
5521 //------------------------------------------------------------------------
5522 // Containment Analysis
5523 //------------------------------------------------------------------------
5524 void Lowering::ContainCheckNode(GenTree* node)
5526 switch (node->gtOper)
5528 case GT_STORE_LCL_VAR:
5529 case GT_STORE_LCL_FLD:
5530 ContainCheckStoreLoc(node->AsLclVarCommon());
5543 ContainCheckCompare(node->AsOp());
5547 ContainCheckJTrue(node->AsOp());
5552 #if !defined(_TARGET_64BIT_)
5561 ContainCheckBinary(node->AsOp());
5564 #if defined(_TARGET_X86_)
5569 ContainCheckMul(node->AsOp());
5575 ContainCheckDivOrMod(node->AsOp());
5582 #ifndef _TARGET_64BIT_
5586 ContainCheckShiftRotate(node->AsOp());
5589 ContainCheckArrOffset(node->AsArrOffs());
5592 ContainCheckLclHeap(node->AsOp());
5595 ContainCheckRet(node->AsOp());
5598 ContainCheckReturnTrap(node->AsOp());
5601 ContainCheckStoreIndir(node->AsIndir());
5603 ContainCheckIndir(node->AsIndir());
5607 #if FEATURE_ARG_SPLIT
5608 case GT_PUTARG_SPLIT:
5609 #endif // FEATURE_ARG_SPLIT
5610 // The regNum must have been set by the lowering of the call.
5611 assert(node->gtRegNum != REG_NA);
5613 #ifdef _TARGET_XARCH_
5615 ContainCheckIntrinsic(node->AsOp());
5617 #endif // _TARGET_XARCH_
5620 ContainCheckSIMD(node->AsSIMD());
5622 #endif // FEATURE_SIMD
5623 #ifdef FEATURE_HW_INTRINSICS
5624 case GT_HWIntrinsic:
5625 ContainCheckHWIntrinsic(node->AsHWIntrinsic());
5627 #endif // FEATURE_HW_INTRINSICS
5633 //------------------------------------------------------------------------
5634 // ContainCheckDivOrMod: determine which operands of a div/mod should be contained.
5637 // node - pointer to the GT_UDIV/GT_UMOD node
5639 void Lowering::ContainCheckDivOrMod(GenTreeOp* node)
5641 assert(node->OperIs(GT_DIV, GT_MOD, GT_UDIV, GT_UMOD));
5643 #ifdef _TARGET_XARCH_
5644 GenTree* dividend = node->gtGetOp1();
5645 GenTree* divisor = node->gtGetOp2();
5647 if (varTypeIsFloating(node->TypeGet()))
5649 // No implicit conversions at this stage as the expectation is that
5650 // everything is made explicit by adding casts.
5651 assert(dividend->TypeGet() == divisor->TypeGet());
5653 if (IsContainableMemoryOp(divisor) || divisor->IsCnsNonZeroFltOrDbl())
5655 MakeSrcContained(node, divisor);
5659 // If there are no containable operands, we can make an operand reg optional.
5660 // SSE2 allows only divisor to be a memory-op.
5661 divisor->SetRegOptional();
5665 bool divisorCanBeRegOptional = true;
5667 if (dividend->OperGet() == GT_LONG)
5669 divisorCanBeRegOptional = false;
5670 MakeSrcContained(node, dividend);
5674 // divisor can be an r/m, but the memory indirection must be of the same size as the divide
5675 if (IsContainableMemoryOp(divisor) && (divisor->TypeGet() == node->TypeGet()))
5677 MakeSrcContained(node, divisor);
5679 else if (divisorCanBeRegOptional)
5681 // If there are no containable operands, we can make an operand reg optional.
5682 // Div instruction allows only divisor to be a memory op.
5683 divisor->SetRegOptional();
5685 #endif // _TARGET_XARCH_
5688 //------------------------------------------------------------------------
5689 // ContainCheckReturnTrap: determine whether the source of a RETURNTRAP should be contained.
5692 // node - pointer to the GT_RETURNTRAP node
5694 void Lowering::ContainCheckReturnTrap(GenTreeOp* node)
5696 #ifdef _TARGET_XARCH_
5697 assert(node->OperIs(GT_RETURNTRAP));
5698 // This just turns into a compare of its child with an int + a conditional call
5699 if (node->gtOp1->isIndir())
5701 MakeSrcContained(node, node->gtOp1);
5703 #endif // _TARGET_XARCH_
5706 //------------------------------------------------------------------------
5707 // ContainCheckArrOffset: determine whether the source of an ARR_OFFSET should be contained.
5710 // node - pointer to the GT_ARR_OFFSET node
5712 void Lowering::ContainCheckArrOffset(GenTreeArrOffs* node)
5714 assert(node->OperIs(GT_ARR_OFFSET));
5715 // we don't want to generate code for this
5716 if (node->gtOffset->IsIntegralConst(0))
5718 MakeSrcContained(node, node->gtArrOffs.gtOffset);
5722 //------------------------------------------------------------------------
5723 // ContainCheckLclHeap: determine whether the source of a GT_LCLHEAP node should be contained.
5726 // node - pointer to the node
5728 void Lowering::ContainCheckLclHeap(GenTreeOp* node)
5730 assert(node->OperIs(GT_LCLHEAP));
5731 GenTree* size = node->gtOp.gtOp1;
5732 if (size->IsCnsIntOrI())
5734 MakeSrcContained(node, size);
5738 //------------------------------------------------------------------------
5739 // ContainCheckRet: determine whether the source of a node should be contained.
5742 // node - pointer to the node
5744 void Lowering::ContainCheckRet(GenTreeOp* ret)
5746 assert(ret->OperIs(GT_RETURN));
5748 #if !defined(_TARGET_64BIT_)
5749 if (ret->TypeGet() == TYP_LONG)
5751 GenTree* op1 = ret->gtGetOp1();
5752 noway_assert(op1->OperGet() == GT_LONG);
5753 MakeSrcContained(ret, op1);
5755 #endif // !defined(_TARGET_64BIT_)
5756 #if FEATURE_MULTIREG_RET
5757 if (varTypeIsStruct(ret))
5759 GenTree* op1 = ret->gtGetOp1();
5760 // op1 must be either a lclvar or a multi-reg returning call
5761 if (op1->OperGet() == GT_LCL_VAR)
5763 GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
5764 LclVarDsc* varDsc = &(comp->lvaTable[lclVarCommon->gtLclNum]);
5765 assert(varDsc->lvIsMultiRegRet);
5767 // Mark var as contained if not enregistrable.
5768 if (!varTypeIsEnregisterableStruct(op1))
5770 MakeSrcContained(ret, op1);
5774 #endif // FEATURE_MULTIREG_RET
5777 //------------------------------------------------------------------------
5778 // ContainCheckJTrue: determine whether the source of a JTRUE should be contained.
5781 // node - pointer to the node
5783 void Lowering::ContainCheckJTrue(GenTreeOp* node)
5785 // The compare does not need to be generated into a register.
5786 GenTree* cmp = node->gtGetOp1();
5787 cmp->gtType = TYP_VOID;
5788 cmp->gtFlags |= GTF_SET_FLAGS;