1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if (curBlk->hasProfileWeight())
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight())
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTree* init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTree* lhs = init->gtOp.gtOp1;
730 GenTree* rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTree* test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTree* opr1 = relop->gtOp.gtOp1;
792 GenTree* opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
827 optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
830 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
832 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
834 else if (limitOp->gtOper == GT_ARR_LENGTH)
836 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
842 // Save the type of the comparison between the iterator and the limit.
843 optLoopTable[loopInd].lpTestTree = relop;
847 //----------------------------------------------------------------------------------
848 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
851 // incr The incr tree to be checked. Whether incr tree is
852 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
855 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
856 // and the rhs limit is extracted from the "test" tree. The limit information is
857 // added to the loop table.
860 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
862 unsigned Compiler::optIsLoopIncrTree(GenTree* incr)
865 genTreeOps updateOper;
866 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
867 if (iterVar != BAD_VAR_NUM)
869 // We have v = v op y type asg node.
882 // Increment should be by a const int.
883 // TODO-CQ: CLONE: allow variable increments.
884 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
893 //----------------------------------------------------------------------------------
894 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
897 // from, to - are blocks (beg, end) which are part of the loop.
898 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
899 // pIterVar - see return value.
902 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
906 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
907 // assigned in the loop.
909 bool Compiler::optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
912 unsigned iterVar = optIsLoopIncrTree(incr);
913 if (iterVar == BAD_VAR_NUM)
917 if (optIsVarAssigned(from, to, incr, iterVar))
919 JITDUMP("iterVar is assigned in loop\n");
927 //----------------------------------------------------------------------------------
928 // optIsLoopTestEvalIntoTemp:
929 // Pattern match if the test tree is computed into a tmp
930 // and the "tmp" is used as jump condition for loop termination.
933 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
934 // where Vtmp contains the actual loop test result.
935 // newStmt - contains the statement that is the actual test stmt involving
936 // the loop iterator.
939 // Returns true if a new test tree can be obtained.
942 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
943 // Then returns the rhs for def of Vtmp as the "test" node.
946 // This method just retrieves what it thinks is the "test" node,
947 // the callers are expected to verify that "iterVar" is used in the test.
949 bool Compiler::optIsLoopTestEvalIntoTemp(GenTree* testStmt, GenTree** newTest)
951 GenTree* test = testStmt->gtStmt.gtStmtExpr;
953 if (test->gtOper != GT_JTRUE)
958 GenTree* relop = test->gtGetOp1();
959 noway_assert(relop->OperIsCompare());
961 GenTree* opr1 = relop->gtOp.gtOp1;
962 GenTree* opr2 = relop->gtOp.gtOp2;
964 // Make sure we have jtrue (vtmp != 0)
965 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
966 opr2->IsIntegralConst(0))
968 // Get the previous statement to get the def (rhs) of Vtmp to see
969 // if the "test" is evaluated into Vtmp.
970 GenTree* prevStmt = testStmt->gtPrev;
971 if (prevStmt == nullptr)
976 GenTree* tree = prevStmt->gtStmt.gtStmtExpr;
977 if (tree->OperGet() == GT_ASG)
979 GenTree* lhs = tree->gtOp.gtOp1;
980 GenTree* rhs = tree->gtOp.gtOp2;
982 // Return as the new test node.
983 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
985 if (rhs->OperIsCompare())
996 //----------------------------------------------------------------------------------
997 // optExtractInitTestIncr:
998 // Extract the "init", "test" and "incr" nodes of the loop.
1001 // head - Loop head block
1002 // bottom - Loop bottom block
1003 // top - Loop top block
1004 // ppInit - The init stmt of the loop if found.
1005 // ppTest - The test stmt of the loop if found.
1006 // ppIncr - The incr stmt of the loop if found.
1009 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1010 // returns true. Returns false if the information can't be extracted.
1013 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1014 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1015 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1018 // +-------<-----------------<-----------+
1021 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1023 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1024 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1025 // in the jtrue condition.
1028 // This method just retrieves what it thinks is the "test" node,
1029 // the callers are expected to verify that "iterVar" is used in the test.
1031 bool Compiler::optExtractInitTestIncr(
1032 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr)
1034 assert(ppInit != nullptr);
1035 assert(ppTest != nullptr);
1036 assert(ppIncr != nullptr);
1038 // Check if last two statements in the loop body are the increment of the iterator
1039 // and the loop termination test.
1040 noway_assert(bottom->bbTreeList != nullptr);
1041 GenTree* test = bottom->bbTreeList->gtPrev;
1042 noway_assert(test != nullptr && test->gtNext == nullptr);
1045 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1050 // Check if we have the incr tree before the test tree, if we don't,
1051 // check if incr is part of the loop "top".
1052 GenTree* incr = test->gtPrev;
1053 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1055 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1060 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1061 GenTree* topLast = top->bbTreeList->gtPrev;
1062 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1072 assert(test != incr);
1074 // Find the last statement in the loop pre-header which we expect to be the initialization of
1075 // the loop iterator.
1076 GenTree* phdr = head->bbTreeList;
1077 if (phdr == nullptr)
1082 GenTree* init = phdr->gtPrev;
1083 noway_assert(init != nullptr && (init->gtNext == nullptr));
1085 // If it is a duplicated loop condition, skip it.
1086 if (init->gtFlags & GTF_STMT_CMPADD)
1088 bool doGetPrev = true;
1092 // Previous optimization passes may have inserted compiler-generated
1093 // statements other than duplicated loop conditions.
1094 doGetPrev = (init->gtPrev != nullptr);
1098 // Must be a duplicated loop condition.
1099 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1104 init = init->gtPrev;
1106 noway_assert(init != nullptr);
1109 noway_assert(init->gtOper == GT_STMT);
1110 noway_assert(test->gtOper == GT_STMT);
1111 noway_assert(incr->gtOper == GT_STMT);
1113 *ppInit = init->gtStmt.gtStmtExpr;
1114 *ppTest = test->gtStmt.gtStmtExpr;
1115 *ppIncr = incr->gtStmt.gtStmtExpr;
1120 /*****************************************************************************
1122 * Record the loop in the loop table. Return true if successful, false if
1123 * out of entries in loop table.
1126 bool Compiler::optRecordLoop(BasicBlock* head,
1132 unsigned char exitCnt)
1134 // Record this loop in the table, if there's room.
1136 assert(optLoopCount <= MAX_LOOP_NUM);
1137 if (optLoopCount == MAX_LOOP_NUM)
1140 loopOverflowThisMethod = true;
1145 // Assumed preconditions on the loop we're adding.
1146 assert(first->bbNum <= top->bbNum);
1147 assert(top->bbNum <= entry->bbNum);
1148 assert(entry->bbNum <= bottom->bbNum);
1149 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1151 // If the new loop contains any existing ones, add it in the right place.
1152 unsigned char loopInd = optLoopCount;
1153 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1155 unsigned char prev = prevPlus1 - 1;
1156 if (optLoopTable[prev].lpContainedBy(first, bottom))
1161 // Move up any loops if necessary.
1162 for (unsigned j = optLoopCount; j > loopInd; j--)
1164 optLoopTable[j] = optLoopTable[j - 1];
1168 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1170 // The loop is well-formed.
1171 assert(optLoopTable[i].lpWellFormed());
1172 // Check for disjoint.
1173 if (optLoopTable[i].lpDisjoint(first, bottom))
1177 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1178 assert(optLoopTable[i].lpContainedBy(first, bottom));
1182 optLoopTable[loopInd].lpHead = head;
1183 optLoopTable[loopInd].lpFirst = first;
1184 optLoopTable[loopInd].lpTop = top;
1185 optLoopTable[loopInd].lpBottom = bottom;
1186 optLoopTable[loopInd].lpEntry = entry;
1187 optLoopTable[loopInd].lpExit = exit;
1188 optLoopTable[loopInd].lpExitCnt = exitCnt;
1190 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1191 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1192 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1194 optLoopTable[loopInd].lpFlags = 0;
1196 // We haven't yet recorded any side effects.
1197 for (MemoryKind memoryKind : allMemoryKinds())
1199 optLoopTable[loopInd].lpLoopHasMemoryHavoc[memoryKind] = false;
1201 optLoopTable[loopInd].lpFieldsModified = nullptr;
1202 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1204 // If DO-WHILE loop mark it as such.
1205 if (head->bbNext == entry)
1207 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1210 // If single exit loop mark it as such.
1214 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1218 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1219 // We have the following restrictions:
1220 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1221 // 2. There must be a loop iterator (a local var) that is
1222 // incremented (decremented or lsh, rsh, mul) with a constant value
1223 // 3. The iterator is incremented exactly once
1224 // 4. The loop condition must use the iterator.
1226 if (bottom->bbJumpKind == BBJ_COND)
1231 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1236 unsigned iterVar = BAD_VAR_NUM;
1237 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1242 // Make sure the "iterVar" initialization is never skipped,
1243 // i.e. every pred of ENTRY other than HEAD is in the loop.
1244 for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
1246 BasicBlock* predBlock = predEdge->flBlock;
1247 if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
1253 if (!optPopulateInitInfo(loopInd, init, iterVar))
1258 // Check that the iterator is used in the loop condition.
1259 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1264 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1265 // Record the iterator, the pointer to the test node
1266 // and the initial value of the iterator (constant or local var)
1267 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1270 optLoopTable[loopInd].lpIterTree = incr;
1273 // Save the initial value of the iterator - can be lclVar or constant
1274 // Flag the loop accordingly.
1280 simpleTestLoopCount++;
1283 // Check if a constant iteration loop.
1284 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1286 // This is a constant loop.
1287 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1289 constIterLoopCount++;
1296 printf("\nConstant loop initializer:\n");
1299 printf("\nConstant loop body:\n");
1301 BasicBlock* block = head;
1304 block = block->bbNext;
1305 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1307 if (stmt->gtStmt.gtStmtExpr == incr)
1312 gtDispTree(stmt->gtStmt.gtStmtExpr);
1314 } while (block != bottom);
1320 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1326 //------------------------------------------------------------------------
1327 // optPrintLoopRecording: Print a recording of the loop.
1330 // loopInd - loop index.
1332 void Compiler::optPrintLoopRecording(unsigned loopInd)
1334 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1335 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1336 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1337 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1338 optLoopTable[loopInd].lpExit);
1340 // If an iterator loop print the iterator and the initialization.
1341 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1343 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1345 printf(GenTree::OpName(optLoopTable[loopInd].lpIterOper()));
1347 printf("%d )", optLoopTable[loopInd].lpIterConst());
1349 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1351 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1353 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1355 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1358 // If a simple test condition print operator and the limits */
1359 printf(GenTree::OpName(optLoopTable[loopInd].lpTestOper()));
1361 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1363 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1366 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1368 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1377 void Compiler::optCheckPreds()
1380 BasicBlock* blockPred;
1383 for (block = fgFirstBB; block; block = block->bbNext)
1385 for (pred = block->bbPreds; pred; pred = pred->flNext)
1387 // make sure this pred is part of the BB list
1388 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1390 if (blockPred == pred->flBlock)
1395 noway_assert(blockPred);
1396 switch (blockPred->bbJumpKind)
1399 if (blockPred->bbJumpDest == block)
1405 noway_assert(blockPred->bbNext == block);
1407 case BBJ_EHFILTERRET:
1409 case BBJ_EHCATCHRET:
1410 noway_assert(blockPred->bbJumpDest == block);
1423 //------------------------------------------------------------------------
1424 // LoopSearch: Class that handles scanning a range of blocks to detect a loop,
1425 // moving blocks to make the loop body contiguous, and recording
1428 // We will use the following terminology:
1429 // HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1430 // Not part of the looping of the loop.
1431 // FIRST - the lexically first basic block (in bbNext order) within this loop.
1432 // TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1433 // BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1434 // EXIT - the predecessor of loop's unique exit edge, if it has a unique exit edge; else nullptr
1435 // ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1437 // We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1438 // When the loop is identified, blocks will be moved out to make it a compact contiguous region if possible,
1439 // and in cases where compaction is not possible, we'll subsequently treat all blocks in the lexical range
1440 // between TOP and BOTTOM as part of the loop even if they aren't part of the SCC.
1441 // Regarding nesting: Since a given block can only have one back-edge (we only detect loops with back-edges
1442 // from BBJ_COND or BBJ_ALWAYS blocks), no two loops will share the same BOTTOM. Two loops may share the
1443 // same FIRST/TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write
1444 // the CFG so that no two loops share the same FIRST/TOP/ENTRY anymore.
1474 // Keeping track of which blocks are in the loop requires two block sets since we may add blocks
1475 // as we go but the BlockSet type's max ID doesn't increase to accomodate them. Define a helper
1476 // struct to make the ensuing code more readable.
1480 // Keep track of blocks with bbNum <= oldBlockMaxNum in a regular BlockSet, since
1481 // it can hold all of them.
1482 BlockSet oldBlocksInLoop; // Blocks with bbNum <= oldBlockMaxNum
1484 // Keep track of blocks with bbNum > oldBlockMaxNum in a separate BlockSet, but
1485 // indexing them by (blockNum - oldBlockMaxNum); since we won't generate more than
1486 // one new block per old block, this must be sufficient to track any new blocks.
1487 BlockSet newBlocksInLoop; // Blocks with bbNum > oldBlockMaxNum
1490 unsigned int oldBlockMaxNum;
1493 LoopBlockSet(Compiler* comp)
1494 : oldBlocksInLoop(BlockSetOps::UninitVal())
1495 , newBlocksInLoop(BlockSetOps::UninitVal())
1497 , oldBlockMaxNum(comp->fgBBNumMax)
1501 void Reset(unsigned int seedBlockNum)
1503 if (BlockSetOps::MayBeUninit(oldBlocksInLoop))
1505 // Either the block sets are uninitialized (and long), so we need to initialize
1506 // them (and allocate their backing storage), or they are short and empty, so
1507 // assigning MakeEmpty to them is as cheap as ClearD.
1508 oldBlocksInLoop = BlockSetOps::MakeEmpty(comp);
1509 newBlocksInLoop = BlockSetOps::MakeEmpty(comp);
1513 // We know the backing storage is already allocated, so just clear it.
1514 BlockSetOps::ClearD(comp, oldBlocksInLoop);
1515 BlockSetOps::ClearD(comp, newBlocksInLoop);
1517 assert(seedBlockNum <= oldBlockMaxNum);
1518 BlockSetOps::AddElemD(comp, oldBlocksInLoop, seedBlockNum);
1521 bool CanRepresent(unsigned int blockNum)
1523 // We can represent old blocks up to oldBlockMaxNum, and
1524 // new blocks up to 2 * oldBlockMaxNum.
1525 return (blockNum <= 2 * oldBlockMaxNum);
1528 bool IsMember(unsigned int blockNum)
1530 if (blockNum > oldBlockMaxNum)
1532 return BlockSetOps::IsMember(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
1534 return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum);
1537 void Insert(unsigned int blockNum)
1539 if (blockNum > oldBlockMaxNum)
1541 BlockSetOps::AddElemD(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
1545 BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
1549 bool TestAndInsert(unsigned int blockNum)
1551 if (blockNum > oldBlockMaxNum)
1553 unsigned int shiftedNum = blockNum - oldBlockMaxNum;
1554 if (!BlockSetOps::IsMember(comp, newBlocksInLoop, shiftedNum))
1556 BlockSetOps::AddElemD(comp, newBlocksInLoop, shiftedNum);
1562 if (!BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum))
1564 BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
1572 LoopBlockSet loopBlocks; // Set of blocks identified as part of the loop
1575 // See LoopSearch class comment header for a diagram relating these fields:
1576 BasicBlock* head; // Predecessor of unique entry edge
1577 BasicBlock* first; // Lexically first in-loop block
1578 BasicBlock* top; // Successor of back-edge from BOTTOM
1579 BasicBlock* bottom; // Predecessor of back-edge to TOP, also lexically last in-loop block
1580 BasicBlock* entry; // Successor of unique entry edge
1582 BasicBlock* lastExit; // Most recently discovered exit block
1583 unsigned char exitCount; // Number of discovered exit edges
1584 unsigned int oldBlockMaxNum; // Used to identify new blocks created during compaction
1585 BlockSet bottomBlocks; // BOTTOM blocks of already-recorded loops
1587 bool forgotExit = false; // Flags a rare case where lastExit gets nulled out, for assertions
1589 bool changedFlowGraph = false; // Signals that loop compaction has modified the flow graph
1592 LoopSearch(Compiler* comp)
1593 : loopBlocks(comp), comp(comp), oldBlockMaxNum(comp->fgBBNumMax), bottomBlocks(BlockSetOps::MakeEmpty(comp))
1595 // Make sure we've renumbered such that the bitsets can hold all the bits
1596 assert(comp->fgBBNumMax <= comp->fgCurBBEpochSize);
1599 //------------------------------------------------------------------------
1600 // RecordLoop: Notify the Compiler that a loop has been found.
1603 // true - Loop successfully recorded.
1604 // false - Compiler has run out of loop descriptors; loop not recorded.
1608 /* At this point we have a compact loop - record it in the loop table
1609 * If we found only one exit, record it in the table too
1610 * (otherwise an exit = nullptr in the loop table means multiple exits) */
1612 BasicBlock* onlyExit = (exitCount == 1 ? lastExit : nullptr);
1613 if (comp->optRecordLoop(head, first, top, entry, bottom, onlyExit, exitCount))
1615 // Record the BOTTOM block for future reference before returning.
1616 assert(bottom->bbNum <= oldBlockMaxNum);
1617 BlockSetOps::AddElemD(comp, bottomBlocks, bottom->bbNum);
1621 // Unable to record this loop because the loop descriptor table overflowed.
1625 //------------------------------------------------------------------------
1626 // ChangedFlowGraph: Determine whether loop compaction has modified the flow graph.
1629 // true - The flow graph has been modified; fgUpdateChangedFlowGraph should
1630 // be called (which is the caller's responsibility).
1631 // false - The flow graph has not been modified by this LoopSearch.
1633 bool ChangedFlowGraph()
1635 return changedFlowGraph;
1638 //------------------------------------------------------------------------
1639 // FindLoop: Search for a loop with the given HEAD block and back-edge.
1642 // head - Block to be the HEAD of any loop identified
1643 // top - Block to be the TOP of any loop identified
1644 // bottom - Block to be the BOTTOM of any loop identified
1647 // true - Found a valid loop.
1648 // false - Did not find a valid loop.
1651 // May modify flow graph to make loop compact before returning.
1652 // Will set instance fields to track loop's extent and exits if a valid
1653 // loop is found, and potentially trash them otherwise.
1655 bool FindLoop(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
1657 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1658 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1659 * as the definition says, but merely an indication that we have a loop there).
1660 * Thus, we have to be very careful and after entry discovery check that it is indeed
1661 * the only place we enter the loop (especially for non-reducible flow graphs).
1664 if (top->bbNum > bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1666 // Edge from BOTTOM to TOP is not a backward edge
1670 if (bottom->bbNum > oldBlockMaxNum)
1672 // Not a true back-edge; bottom is a block added to reconnect fall-through during
1673 // loop processing, so its block number does not reflect its position.
1677 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1678 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1679 (bottom->bbJumpKind == BBJ_SWITCH))
1681 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1682 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1686 /* The presence of a "back edge" is an indication that a loop might be present here
1689 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1690 * node in the loop to any other node in the loop (wholly within the loop)
1691 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1692 * in the loop from outside the loop, and that is through the ENTRY
1695 /* Let's find the loop ENTRY */
1696 BasicBlock* entry = FindEntry(head, top, bottom);
1698 if (entry == nullptr)
1700 // For now, we only recognize loops where HEAD has some successor ENTRY in the loop.
1704 // Passed the basic checks; initialize instance state for this back-edge.
1707 this->entry = entry;
1708 this->bottom = bottom;
1709 this->lastExit = nullptr;
1710 this->exitCount = 0;
1712 // Now we find the "first" block -- the earliest block reachable within the loop.
1713 // With our current algorithm, this is always the same as "top".
1716 if (!HasSingleEntryCycle())
1718 // There isn't actually a loop between TOP and BOTTOM
1722 if (!loopBlocks.IsMember(top->bbNum))
1724 // The "back-edge" we identified isn't actually part of the flow cycle containing ENTRY
1728 // Disqualify loops where the first block of the loop is less nested in EH than
1729 // the bottom block. That is, we don't want to handle loops where the back edge
1730 // goes from within an EH region to a first block that is outside that same EH
1731 // region. Note that we *do* handle loops where the first block is the *first*
1732 // block of a more nested EH region (since it is legal to branch to the first
1733 // block of an immediately more nested EH region). So, for example, disqualify
1740 // BB10 BBJ_COND => BB02
1744 // Here, BB10 is more nested than BB02.
1746 if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), first))
1748 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1750 first->bbNum, bottom->bbNum);
1754 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1755 // Disqualify loops where the first block of the loop is a finally target.
1756 // The main problem is when multiple loops share a 'first' block that is a finally
1757 // target and we canonicalize the loops by adding a new loop head. In that case, we
1758 // need to update the blocks so the finally target bit is moved to the newly created
1759 // block, and removed from the old 'first' block. This is 'hard', so at this point
1760 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1761 // long-term), it's easier to disallow the loop than to update the flow graph to
1762 // support this case.
1764 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1766 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1769 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1771 // Compact the loop (sweep through it and move out any blocks that aren't part of the
1772 // flow cycle), and find the exits.
1773 if (!MakeCompactAndFindExits())
1775 // Unable to preserve well-formed loop during compaction.
1779 // We have a valid loop.
1784 //------------------------------------------------------------------------
1785 // FindEntry: See if given HEAD flows to valid ENTRY between given TOP and BOTTOM
1788 // head - Block to be the HEAD of any loop identified
1789 // top - Block to be the TOP of any loop identified
1790 // bottom - Block to be the BOTTOM of any loop identified
1793 // Block to be the ENTRY of any loop identified, or nullptr if no
1794 // such entry meeting our criteria can be found.
1797 // Returns main entry if one is found, does not check for side-entries.
1799 BasicBlock* FindEntry(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
1801 if (head->bbJumpKind == BBJ_ALWAYS)
1803 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1805 /* OK - we enter somewhere within the loop */
1807 /* some useful asserts
1808 * Cannot enter at the top - should have being caught by redundant jumps */
1810 assert((head->bbJumpDest != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1812 return head->bbJumpDest;
1816 /* special case - don't consider now */
1817 // assert (!"Loop entered in weird way!");
1821 // Can we fall through into the loop?
1822 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1824 /* The ENTRY is at the TOP (a do-while loop) */
1829 return nullptr; // head does not flow into the loop bail for now
1833 //------------------------------------------------------------------------
1834 // HasSingleEntryCycle: Perform a reverse flow walk from ENTRY, visiting
1835 // only blocks between TOP and BOTTOM, to determine if such a cycle
1836 // exists and if it has a single entry.
1839 // true - Found a single-entry cycle.
1840 // false - Did not find a single-entry cycle.
1843 // Will mark (in `loopBlocks`) all blocks found to participate in the
1846 bool HasSingleEntryCycle()
1848 // Now do a backwards flow walk from entry to see if we have a single-entry loop
1849 bool foundCycle = false;
1851 // Seed the loop block set and worklist with the entry block.
1852 loopBlocks.Reset(entry->bbNum);
1853 jitstd::list<BasicBlock*> worklist(comp->getAllocator());
1854 worklist.push_back(entry);
1856 while (!worklist.empty())
1858 BasicBlock* block = worklist.back();
1859 worklist.pop_back();
1861 /* Make sure ENTRY dominates all blocks in the loop
1862 * This is necessary to ensure condition 2. above
1864 if (block->bbNum > oldBlockMaxNum)
1866 // This is a new block we added to connect fall-through, so the
1867 // recorded dominator information doesn't cover it. Just continue,
1868 // and when we process its unique predecessor we'll abort if ENTRY
1869 // doesn't dominate that.
1871 else if (!comp->fgDominate(entry, block))
1876 // Add preds to the worklist, checking for side-entries.
1877 for (flowList* predIter = block->bbPreds; predIter != nullptr; predIter = predIter->flNext)
1879 BasicBlock* pred = predIter->flBlock;
1881 unsigned int testNum = PositionNum(pred);
1883 if ((testNum < top->bbNum) || (testNum > bottom->bbNum))
1885 // Pred is out of loop range
1890 // This is the single entry we expect.
1893 // ENTRY has some pred other than head outside the loop. If ENTRY does not
1894 // dominate this pred, we'll consider this a side-entry and skip this loop;
1895 // otherwise the loop is still valid and this may be a (flow-wise) back-edge
1896 // of an outer loop. For the dominance test, if `pred` is a new block, use
1897 // its unique predecessor since the dominator tree has info for that.
1898 BasicBlock* effectivePred = (pred->bbNum > oldBlockMaxNum ? pred->bbPrev : pred);
1899 if (comp->fgDominate(entry, effectivePred))
1901 // Outer loop back-edge
1906 // There are multiple entries to this loop, don't consider it.
1913 // We have indeed found a cycle in the flow graph.
1914 isFirstVisit = !foundCycle;
1916 assert(loopBlocks.IsMember(pred->bbNum));
1918 else if (loopBlocks.TestAndInsert(pred->bbNum))
1920 // Already visited this pred
1921 isFirstVisit = false;
1925 // Add this pred to the worklist
1926 worklist.push_back(pred);
1927 isFirstVisit = true;
1930 if (isFirstVisit && (pred->bbNext != nullptr) && (PositionNum(pred->bbNext) == pred->bbNum))
1932 // We've created a new block immediately after `pred` to
1933 // reconnect what was fall-through. Mark it as in-loop also;
1934 // it needs to stay with `prev` and if it exits the loop we'd
1935 // just need to re-create it if we tried to move it out.
1936 loopBlocks.Insert(pred->bbNext->bbNum);
1944 //------------------------------------------------------------------------
1945 // PositionNum: Get the number identifying a block's position per the
1946 // lexical ordering that existed before searching for (and compacting)
1950 // block - Block whose position is desired.
1953 // A number indicating that block's position relative to others.
1956 // When the given block is a new one created during loop compaction,
1957 // the number of its unique predecessor is returned.
1959 unsigned int PositionNum(BasicBlock* block)
1961 if (block->bbNum > oldBlockMaxNum)
1963 // This must be a block we inserted to connect fall-through after moving blocks.
1964 // To determine if it's in the loop or not, use the number of its unique predecessor
1966 assert(block->bbPreds->flBlock == block->bbPrev);
1967 assert(block->bbPreds->flNext == nullptr);
1968 return block->bbPrev->bbNum;
1970 return block->bbNum;
1973 //------------------------------------------------------------------------
1974 // MakeCompactAndFindExits: Compact the loop (sweep through it and move out
1975 // any blocks that aren't part of the flow cycle), and find the exits (set
1976 // lastExit and exitCount).
1979 // true - Loop successfully compacted (or `loopBlocks` expanded to
1980 // include all blocks in the lexical range), exits enumerated.
1981 // false - Loop cannot be made compact and remain well-formed.
1983 bool MakeCompactAndFindExits()
1985 // Compaction (if it needs to happen) will require an insertion point.
1986 BasicBlock* moveAfter = nullptr;
1988 for (BasicBlock* previous = top->bbPrev; previous != bottom;)
1990 BasicBlock* block = previous->bbNext;
1992 if (loopBlocks.IsMember(block->bbNum))
1994 // This block is a member of the loop. Check to see if it may exit the loop.
1995 CheckForExit(block);
1997 // Done processing this block; move on to the next.
2002 // This blocks is lexically between TOP and BOTTOM, but it does not
2003 // participate in the flow cycle. Check for a run of consecutive
2005 BasicBlock* lastNonLoopBlock = block;
2006 BasicBlock* nextLoopBlock = block->bbNext;
2007 while (!loopBlocks.IsMember(nextLoopBlock->bbNum))
2009 lastNonLoopBlock = nextLoopBlock;
2010 nextLoopBlock = nextLoopBlock->bbNext;
2011 // This loop must terminate because we know BOTTOM is in loopBlocks.
2014 // Choose an insertion point for non-loop blocks if we haven't yet done so.
2015 if (moveAfter == nullptr)
2017 moveAfter = FindInsertionPoint();
2020 if (!BasicBlock::sameEHRegion(previous, nextLoopBlock) || !BasicBlock::sameEHRegion(previous, moveAfter))
2022 // EH regions would be ill-formed if we moved these blocks out.
2023 // See if we can consider them loop blocks without introducing
2025 if (CanTreatAsLoopBlocks(block, lastNonLoopBlock))
2027 // The call to `canTreatAsLoop` marked these blocks as part of the loop;
2028 // iterate without updating `previous` so that we'll analyze them as part
2034 // We can't move these out of the loop or leave them in, so just give
2040 // Now physically move the blocks.
2041 BasicBlock* moveBefore = moveAfter->bbNext;
2043 comp->fgUnlinkRange(block, lastNonLoopBlock);
2044 comp->fgMoveBlocksAfter(block, lastNonLoopBlock, moveAfter);
2045 comp->ehUpdateLastBlocks(moveAfter, lastNonLoopBlock);
2047 // Apply any adjustments needed for fallthrough at the boundaries of the moved region.
2048 FixupFallThrough(moveAfter, moveBefore, block);
2049 FixupFallThrough(lastNonLoopBlock, nextLoopBlock, moveBefore);
2050 // Also apply any adjustments needed where the blocks were snipped out of the loop.
2051 BasicBlock* newBlock = FixupFallThrough(previous, block, nextLoopBlock);
2052 if (newBlock != nullptr)
2054 // This new block is in the loop and is a loop exit.
2055 loopBlocks.Insert(newBlock->bbNum);
2056 lastExit = newBlock;
2060 // Update moveAfter for the next insertion.
2061 moveAfter = lastNonLoopBlock;
2063 // Note that we've changed the flow graph, and continue without updating
2064 // `previous` so that we'll process nextLoopBlock.
2065 changedFlowGraph = true;
2068 if ((exitCount == 1) && (lastExit == nullptr))
2070 // If we happen to have a loop with two exits, one of which goes to an
2071 // infinite loop that's lexically nested inside it, where the inner loop
2072 // can't be moved out, we can end up in this situation (because
2073 // CanTreatAsLoopBlocks will have decremented the count expecting to find
2074 // another exit later). Bump the exit count to 2, since downstream code
2075 // will not be prepared for null lastExit with exitCount of 1.
2080 // Loop compaction was successful
2084 //------------------------------------------------------------------------
2085 // FindInsertionPoint: Find an appropriate spot to which blocks that are
2086 // lexically between TOP and BOTTOM but not part of the flow cycle
2090 // Block after which to insert moved blocks.
2092 BasicBlock* FindInsertionPoint()
2094 // Find an insertion point for blocks we're going to move. Move them down
2095 // out of the loop, and if possible find a spot that won't break up fall-through.
2096 BasicBlock* moveAfter = bottom;
2097 while (moveAfter->bbFallsThrough())
2099 // Keep looking for a better insertion point if we can.
2100 BasicBlock* newMoveAfter = TryAdvanceInsertionPoint(moveAfter);
2102 if (newMoveAfter == nullptr)
2104 // Ran out of candidate insertion points, so just split up the fall-through.
2108 moveAfter = newMoveAfter;
2114 //------------------------------------------------------------------------
2115 // TryAdvanceInsertionPoint: Find the next legal insertion point after
2116 // the given one, if one exists.
2119 // oldMoveAfter - Prior insertion point; find the next after this.
2122 // The next block after `oldMoveAfter` that is a legal insertion point
2123 // (i.e. blocks being swept out of the loop can be moved immediately
2124 // after it), if one exists, else nullptr.
2126 BasicBlock* TryAdvanceInsertionPoint(BasicBlock* oldMoveAfter)
2128 BasicBlock* newMoveAfter = oldMoveAfter->bbNext;
2130 if (!BasicBlock::sameEHRegion(oldMoveAfter, newMoveAfter))
2132 // Don't cross an EH region boundary.
2136 if ((newMoveAfter->bbJumpKind == BBJ_ALWAYS) || (newMoveAfter->bbJumpKind == BBJ_COND))
2138 unsigned int destNum = newMoveAfter->bbJumpDest->bbNum;
2139 if ((destNum >= top->bbNum) && (destNum <= bottom->bbNum) && !loopBlocks.IsMember(destNum))
2141 // Reversing this branch out of block `newMoveAfter` could confuse this algorithm
2142 // (in particular, the edge would still be numerically backwards but no longer be
2143 // lexically backwards, so a lexical forward walk from TOP would not find BOTTOM),
2144 // so don't do that.
2145 // We're checking for BBJ_ALWAYS and BBJ_COND only here -- we don't need to
2146 // check for BBJ_SWITCH because we'd never consider it a loop back-edge.
2151 // Similarly check to see if advancing to `newMoveAfter` would reverse the lexical order
2152 // of an edge from the run of blocks being moved to `newMoveAfter` -- doing so would
2153 // introduce a new lexical back-edge, which could (maybe?) confuse the loop search
2154 // algorithm, and isn't desirable layout anyway.
2155 for (flowList* predIter = newMoveAfter->bbPreds; predIter != nullptr; predIter = predIter->flNext)
2157 unsigned int predNum = predIter->flBlock->bbNum;
2159 if ((predNum >= top->bbNum) && (predNum <= bottom->bbNum) && !loopBlocks.IsMember(predNum))
2161 // Don't make this forward edge a backwards edge.
2166 if (IsRecordedBottom(newMoveAfter))
2168 // This is the BOTTOM of another loop; don't move any blocks past it, to avoid moving them
2169 // out of that loop (we should have already done so when processing that loop if it were legal).
2173 // Advancing the insertion point is ok, except that we can't split up any CallFinally/BBJ_ALWAYS
2174 // pair, so if we've got such a pair recurse to see if we can move past the whole thing.
2175 return (newMoveAfter->isBBCallAlwaysPair() ? TryAdvanceInsertionPoint(newMoveAfter) : newMoveAfter);
2178 //------------------------------------------------------------------------
2179 // isOuterBottom: Determine if the given block is the BOTTOM of a previously
2183 // block - Block to check for BOTTOM-ness.
2186 // true - The blocks was recorded as `bottom` of some earlier-processed loop.
2187 // false - No loops yet recorded have this block as their `bottom`.
2189 bool IsRecordedBottom(BasicBlock* block)
2191 if (block->bbNum > oldBlockMaxNum)
2193 // This is a new block, which can't be an outer bottom block because we only allow old blocks
2197 return BlockSetOps::IsMember(comp, bottomBlocks, block->bbNum);
2200 //------------------------------------------------------------------------
2201 // CanTreatAsLoopBlocks: If the given range of blocks can be treated as
2202 // loop blocks, add them to loopBlockSet and return true. Otherwise,
2206 // firstNonLoopBlock - First block in the run to be subsumed.
2207 // lastNonLoopBlock - Last block in the run to be subsumed.
2210 // true - The blocks from `fistNonLoopBlock` to `lastNonLoopBlock` were
2211 // successfully added to `loopBlocks`.
2212 // false - Treating the blocks from `fistNonLoopBlock` to `lastNonLoopBlock`
2213 // would not be legal (it would induce a side-entry).
2216 // `loopBlocks` may be modified even if `false` is returned.
2217 // `exitCount` and `lastExit` may be modified if this process identifies
2218 // in-loop edges that were previously counted as exits.
2220 bool CanTreatAsLoopBlocks(BasicBlock* firstNonLoopBlock, BasicBlock* lastNonLoopBlock)
2222 BasicBlock* nextLoopBlock = lastNonLoopBlock->bbNext;
2223 for (BasicBlock* testBlock = firstNonLoopBlock; testBlock != nextLoopBlock; testBlock = testBlock->bbNext)
2225 for (flowList* predIter = testBlock->bbPreds; predIter != nullptr; predIter = predIter->flNext)
2227 BasicBlock* testPred = predIter->flBlock;
2228 unsigned int predPosNum = PositionNum(testPred);
2229 unsigned int firstNonLoopPosNum = PositionNum(firstNonLoopBlock);
2230 unsigned int lastNonLoopPosNum = PositionNum(lastNonLoopBlock);
2232 if (loopBlocks.IsMember(predPosNum) ||
2233 ((predPosNum >= firstNonLoopPosNum) && (predPosNum <= lastNonLoopPosNum)))
2235 // This pred is in the loop (or what will be the loop if we determine this
2236 // run of exit blocks doesn't include a side-entry).
2238 if (predPosNum < firstNonLoopPosNum)
2240 // We've already counted this block as an exit, so decrement the count.
2242 if (lastExit == testPred)
2244 // Erase this now-bogus `lastExit` entry.
2246 INDEBUG(forgotExit = true);
2252 // This pred is not in the loop, so this constitutes a side-entry.
2257 // Either we're going to abort the loop on a subsequent testBlock, or this
2258 // testBlock is part of the loop.
2259 loopBlocks.Insert(testBlock->bbNum);
2262 // All blocks were ok to leave in the loop.
2266 //------------------------------------------------------------------------
2267 // FixupFallThrough: Re-establish any broken control flow connectivity
2268 // and eliminate any "goto-next"s that were created by changing the
2269 // given block's lexical follower.
2272 // block - Block whose `bbNext` has changed.
2273 // oldNext - Previous value of `block->bbNext`.
2274 // newNext - New value of `block->bbNext`.
2277 // If a new block is created to reconnect flow, the new block is
2278 // returned; otherwise, nullptr.
2280 BasicBlock* FixupFallThrough(BasicBlock* block, BasicBlock* oldNext, BasicBlock* newNext)
2282 // If we create a new block, that will be our return value.
2283 BasicBlock* newBlock = nullptr;
2285 if (block->bbFallsThrough())
2287 // Need to reconnect the flow from `block` to `oldNext`.
2289 if ((block->bbJumpKind == BBJ_COND) && (block->bbJumpDest == newNext))
2291 /* Reverse the jump condition */
2292 GenTree* test = block->lastNode();
2293 noway_assert(test->OperIsConditionalJump());
2295 if (test->OperGet() == GT_JTRUE)
2297 GenTree* cond = comp->gtReverseCond(test->gtOp.gtOp1);
2298 assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
2299 test->gtOp.gtOp1 = cond;
2303 comp->gtReverseCond(test);
2306 // Redirect the Conditional JUMP to go to `oldNext`
2307 block->bbJumpDest = oldNext;
2311 // Insert an unconditional jump to `oldNext` just after `block`.
2312 newBlock = comp->fgConnectFallThrough(block, oldNext);
2313 noway_assert((newBlock == nullptr) || loopBlocks.CanRepresent(newBlock->bbNum));
2316 else if ((block->bbJumpKind == BBJ_ALWAYS) && (block->bbJumpDest == newNext))
2318 // We've made `block`'s jump target its bbNext, so remove the jump.
2319 if (!comp->fgOptimizeBranchToNext(block, newNext, block->bbPrev))
2321 // If optimizing away the goto-next failed for some reason, mark it KEEP_BBJ_ALWAYS to
2322 // prevent assertions from complaining about it.
2323 block->bbFlags |= BBF_KEEP_BBJ_ALWAYS;
2327 // Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS.
2328 assert((block->bbJumpKind != BBJ_COND) || (block->bbJumpKind != BBJ_ALWAYS) || (block->bbJumpDest != newNext) ||
2329 ((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0));
2333 //------------------------------------------------------------------------
2334 // CheckForExit: Check if the given block has any successor edges that are
2335 // loop exits, and update `lastExit` and `exitCount` if so.
2338 // block - Block whose successor edges are to be checked.
2341 // If one block has multiple exiting successor edges, those are counted
2342 // as multiple exits in `exitCount`.
2344 void CheckForExit(BasicBlock* block)
2346 BasicBlock* exitPoint;
2348 switch (block->bbJumpKind)
2351 case BBJ_CALLFINALLY:
2353 case BBJ_EHCATCHRET:
2354 assert(block->bbJumpDest);
2355 exitPoint = block->bbJumpDest;
2357 if (!loopBlocks.IsMember(exitPoint->bbNum))
2359 /* exit from a block other than BOTTOM */
2368 case BBJ_EHFINALLYRET:
2369 case BBJ_EHFILTERRET:
2370 /* The "try" associated with this "finally" must be in the
2371 * same loop, so the finally block will return control inside the loop */
2376 /* those are exits from the loop */
2384 jumpCnt = block->bbJumpSwt->bbsCount;
2385 BasicBlock** jumpTab;
2386 jumpTab = block->bbJumpSwt->bbsDstTab;
2390 noway_assert(*jumpTab);
2391 exitPoint = *jumpTab;
2393 if (!loopBlocks.IsMember(exitPoint->bbNum))
2398 } while (++jumpTab, --jumpCnt);
2402 noway_assert(!"Unexpected bbJumpKind");
2406 if (block->bbFallsThrough() && !loopBlocks.IsMember(block->bbNext->bbNum))
2408 // Found a fall-through exit.
2416 /*****************************************************************************
2417 * Find the natural loops, using dominators. Note that the test for
2418 * a loop is slightly different from the standard one, because we have
2419 * not done a depth first reordering of the basic blocks.
2422 void Compiler::optFindNaturalLoops()
2427 printf("*************** In optFindNaturalLoops()\n");
2431 noway_assert(fgDomsComputed);
2435 hasMethodLoops = false;
2436 loopsThisMethod = 0;
2437 loopOverflowThisMethod = false;
2440 LoopSearch search(this);
2442 for (BasicBlock* head = fgFirstBB; head->bbNext; head = head->bbNext)
2444 BasicBlock* top = head->bbNext;
2446 // Blocks that are rarely run have a zero bbWeight and should
2447 // never be optimized here
2449 if (top->bbWeight == BB_ZERO_WEIGHT)
2454 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
2456 if (search.FindLoop(head, top, pred->flBlock))
2458 // Found a loop; record it and see if we've hit the limit.
2459 bool recordedLoop = search.RecordLoop();
2461 (void)recordedLoop; // avoid unusued variable warnings in COUNT_LOOPS and !DEBUG
2464 if (!hasMethodLoops)
2466 /* mark the method as containing natural loops */
2468 hasMethodLoops = true;
2471 /* increment total number of loops found */
2475 /* keep track of the number of exits */
2476 loopExitCountTable.record(static_cast<unsigned>(exitCount));
2477 #else // COUNT_LOOPS
2478 assert(recordedLoop);
2479 if (optLoopCount == MAX_LOOP_NUM)
2481 // We won't be able to record any more loops, so stop looking.
2484 #endif // COUNT_LOOPS
2486 // Continue searching preds of `top` to see if any other are
2487 // back-edges (this can happen for nested loops). The iteration
2488 // is safe because the compaction we do only modifies predecessor
2489 // lists of blocks that gain or lose fall-through from their
2490 // `bbPrev`, but since the motion is from within the loop to below
2491 // it, we know we're not altering the relationship between `top`
2492 // and its `bbPrev`.
2499 loopCountTable.record(loopsThisMethod);
2500 if (maxLoopsPerMethod < loopsThisMethod)
2502 maxLoopsPerMethod = loopsThisMethod;
2504 if (loopOverflowThisMethod)
2506 totalLoopOverflows++;
2508 #endif // COUNT_LOOPS
2510 bool mod = search.ChangedFlowGraph();
2514 // Need to renumber blocks now since loop canonicalization
2515 // depends on it; can defer the rest of fgUpdateChangedFlowGraph()
2516 // until after canonicalizing loops. Dominator information is
2517 // recorded in terms of block numbers, so flag it invalid.
2518 fgDomsComputed = false;
2522 // Now the loop indices are stable. We can figure out parent/child relationships
2523 // (using table indices to name loops), and label blocks.
2524 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
2526 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
2529 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
2531 optLoopTable[loopInd].lpParent = possibleParent;
2532 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
2533 optLoopTable[possibleParent].lpChild = loopInd;
2539 // Now label the blocks with the innermost loop to which they belong. Since parents
2540 // precede children in the table, doing the labeling for each loop in order will achieve
2541 // this -- the innermost loop labeling will be done last.
2542 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
2544 BasicBlock* first = optLoopTable[loopInd].lpFirst;
2545 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
2546 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
2548 blk->bbNatLoopNum = loopInd;
2553 assert(blk->bbNext != nullptr); // We should never reach nullptr.
2557 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
2558 // one, if necessary, for loops containing others that share a "top."
2559 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
2561 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
2562 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
2568 if (optCanonicalizeLoopNest(loopInd))
2575 fgUpdateChangedFlowGraph();
2579 if (verbose && optLoopCount > 0)
2581 printf("\nFinal natural loop table:\n");
2582 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
2584 optPrintLoopInfo(loopInd);
2591 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
2593 BasicBlock* newJumpDest = nullptr;
2594 switch (blk->bbJumpKind)
2599 case BBJ_EHFILTERRET:
2600 case BBJ_EHFINALLYRET:
2601 case BBJ_EHCATCHRET:
2602 // These have no jump destination to update.
2607 case BBJ_CALLFINALLY:
2609 // All of these have a single jump destination to update.
2610 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
2612 blk->bbJumpDest = newJumpDest;
2618 bool redirected = false;
2619 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
2621 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
2623 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
2627 // If any redirections happend, invalidate the switch table map for the switch.
2630 // Don't create a new map just to try to remove an entry.
2631 BlockToSwitchDescMap* switchMap = GetSwitchDescMap(/* createIfNull */ false);
2632 if (switchMap != nullptr)
2634 switchMap->Remove(blk);
2645 // TODO-Cleanup: This should be a static member of the BasicBlock class.
2646 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
2648 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
2650 // copy the jump destination(s) from "from" to "to".
2651 switch (to->bbJumpKind)
2655 case BBJ_CALLFINALLY:
2657 // All of these have a single jump destination to update.
2658 to->bbJumpDest = from->bbJumpDest;
2663 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
2664 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
2665 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
2667 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
2669 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2679 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2680 // Returns 'true' if the flow graph is modified.
2681 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2683 bool modified = false;
2685 // Is the top of the current loop not in any nested loop?
2686 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2688 if (optCanonicalizeLoop(loopInd))
2694 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2695 child = optLoopTable[child].lpSibling)
2697 if (optCanonicalizeLoopNest(child))
2706 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2708 // Is the top uniquely part of the current loop?
2709 BasicBlock* t = optLoopTable[loopInd].lpTop;
2711 if (t->bbNatLoopNum == loopInd)
2716 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2718 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2720 // Otherwise, the top of this loop is also part of a nested loop.
2722 // Insert a new unique top for this loop. We must be careful to put this new
2723 // block in the correct EH region. Note that f->bbPrev might be in a different
2724 // EH region. For example:
2732 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2733 // On the other hand, the first block of multiple loops might be the first
2734 // block of a 'try' region that is completely contained in the multiple loops.
2739 // BB10 BBJ_ALWAYS => BB08
2741 // BB12 BBJ_ALWAYS => BB08
2743 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2744 // is a single-block "try" region. Neither loop "bottom" block is in the same
2745 // "try" region as BB08. This is legal because you can jump to the first block
2746 // of a try region. With EH normalization, no two "try" regions will share
2747 // this block. In this case, we need to insert a new block for the outer loop
2748 // in the same EH region as the branch from the "bottom":
2753 // BB10 BBJ_ALWAYS => BB08
2755 // BB12 BBJ_ALWAYS => BB30
2757 // Another possibility is that the "first" block of the loop nest can be the first block
2758 // of a "try" region that also has other predecessors than those in the loop, or even in
2759 // the "try" region (since blocks can target the first block of a "try" region). For example:
2763 // BB10 BBJ_ALWAYS => BB08
2765 // BB12 BBJ_ALWAYS => BB08
2768 // BB20 BBJ_ALWAYS => BB08
2770 // BB25 BBJ_ALWAYS => BB08
2772 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2773 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2774 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2775 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2776 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2777 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2778 // situation of branching to a non-first block of a 'try' region.
2780 // We can also have a loop nest where the "first" block is outside of a "try" region
2781 // and the back edges are inside a "try" region, for example:
2785 // BB09 try { BBJ_COND => BB02
2787 // BB15 BBJ_COND => BB02
2789 // BB21 } // end of "try"
2791 // In this case, both loop back edges were formed by "leave" instructions that were
2792 // imported into branches that were later made conditional. In this case, we don't
2793 // want to copy the EH region of the back edge, since that would create a block
2794 // outside of and disjoint with the "try" region of the back edge. However, to
2795 // simplify things, we disqualify this type of loop, so we should never see this here.
2797 BasicBlock* h = optLoopTable[loopInd].lpHead;
2798 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2799 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2801 // The loop must be entirely contained within a single handler region.
2802 assert(BasicBlock::sameHndRegion(f, b));
2804 // If the bottom block is in the same "try" region, then we extend the EH
2805 // region. Otherwise, we add the new block outside the "try" region.
2806 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2807 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2810 // We need to set the EH region manually. Set it to be the same
2811 // as the bottom block.
2812 newT->copyEHRegion(b);
2815 // The new block can reach the same set of blocks as the old one, but don't try to reflect
2816 // that in its reachability set here -- creating the new block may have changed the BlockSet
2817 // representation from short to long, and canonicalizing loops is immediately followed by
2818 // a call to fgUpdateChangedFlowGraph which will recompute the reachability sets anyway.
2820 // Redirect the "bottom" of the current loop to "newT".
2821 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2822 blockMap->Set(t, newT);
2823 optRedirectBlock(b, blockMap);
2825 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2826 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2827 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2828 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2831 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2832 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2833 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2834 // edge of the blockMap, so nothing will happen.
2835 bool firstPred = true;
2836 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2838 BasicBlock* topPredBlock = topPred->flBlock;
2840 // Skip if topPredBlock is in the loop.
2841 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2842 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2843 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2844 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2846 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2847 "redirecting its bottom edge\n",
2848 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2852 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2854 optRedirectBlock(topPredBlock, blockMap);
2856 // When we have profile data then the 'newT' block will inherit topPredBlock profile weight
2857 if (topPredBlock->hasProfileWeight())
2859 // This corrects an issue when the topPredBlock has a profile based weight
2863 JITDUMP("in optCanonicalizeLoop: block BB%02u will inheritWeight from BB%02u\n", newT->bbNum,
2864 topPredBlock->bbNum);
2866 newT->inheritWeight(topPredBlock);
2871 JITDUMP("in optCanonicalizeLoop: block BB%02u will also contribute to the weight of BB%02u\n",
2872 newT->bbNum, topPredBlock->bbNum);
2874 BasicBlock::weight_t newWeight = newT->getBBWeight(this) + topPredBlock->getBBWeight(this);
2875 newT->setBBWeight(newWeight);
2880 assert(newT->bbNext == f);
2883 newT->bbJumpKind = BBJ_ALWAYS;
2884 newT->bbJumpDest = t;
2885 newT->bbTreeList = nullptr;
2886 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2889 // If it had been a do-while loop (top == entry), update entry, as well.
2890 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2891 if (optLoopTable[loopInd].lpTop == origE)
2893 optLoopTable[loopInd].lpEntry = newT;
2895 optLoopTable[loopInd].lpTop = newT;
2896 optLoopTable[loopInd].lpFirst = newT;
2898 newT->bbNatLoopNum = loopInd;
2900 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2901 dspPtr(newT), loopInd);
2903 // Make sure the head block still goes to the entry...
2904 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2906 h->bbJumpKind = BBJ_ALWAYS;
2907 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2909 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2911 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2912 optLoopTable[loopInd].lpHead = h2;
2913 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2914 h2->bbTreeList = nullptr;
2915 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2918 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2919 // it must be the case that they were do-while's (since "h" fell through to the entry).
2920 // The new node "newT" becomes the head of such loops.
2921 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2922 childLoop = optLoopTable[childLoop].lpSibling)
2924 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2925 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2927 optUpdateLoopHead(childLoop, h, newT);
2933 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2935 assert(l1 != BasicBlock::NOT_IN_LOOP);
2940 else if (l2 == BasicBlock::NOT_IN_LOOP)
2946 return optLoopContains(l1, optLoopTable[l2].lpParent);
2950 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2952 assert(optLoopTable[loopInd].lpHead == from);
2953 optLoopTable[loopInd].lpHead = to;
2954 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2955 childLoop = optLoopTable[childLoop].lpSibling)
2957 if (optLoopTable[childLoop].lpHead == from)
2959 optUpdateLoopHead(childLoop, from, to);
2964 /*****************************************************************************
2965 * If the : i += const" will cause an overflow exception for the small types.
2968 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2975 type_MAX = SCHAR_MAX;
2978 type_MAX = UCHAR_MAX;
2981 type_MAX = SHRT_MAX;
2984 type_MAX = USHRT_MAX;
2987 case TYP_UINT: // Detected by checking for 32bit ....
2989 return false; // ... overflow same as done for TYP_INT
2995 if (iterAtExit > type_MAX)
3005 /*****************************************************************************
3006 * If the "i -= const" will cause an underflow exception for the small types
3009 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
3016 type_MIN = SCHAR_MIN;
3019 type_MIN = SHRT_MIN;
3028 case TYP_UINT: // Detected by checking for 32bit ....
3030 return false; // ... underflow same as done for TYP_INT
3036 if (iterAtExit < type_MIN)
3046 /*****************************************************************************
3048 * Helper for unroll loops - Computes the number of repetitions
3049 * in a constant loop. If it cannot prove the number is constant returns false
3052 bool Compiler::optComputeLoopRep(int constInit,
3055 genTreeOps iterOper,
3056 var_types iterOperType,
3057 genTreeOps testOper,
3060 unsigned* iterCount)
3062 noway_assert(genActualType(iterOperType) == TYP_INT);
3065 __int64 constLimitX;
3070 // Using this, we can just do a signed comparison with other 32 bit values.
3073 constLimitX = (unsigned int)constLimit;
3077 constLimitX = (signed int)constLimit;
3080 switch (iterOperType)
3082 // For small types, the iteration operator will narrow these values if big
3084 #define INIT_ITER_BY_TYPE(type) \
3085 constInitX = (type)constInit; \
3086 iterInc = (type)iterInc;
3089 INIT_ITER_BY_TYPE(signed char);
3092 INIT_ITER_BY_TYPE(unsigned char);
3095 INIT_ITER_BY_TYPE(signed short);
3098 INIT_ITER_BY_TYPE(unsigned short);
3101 // For the big types, 32 bit arithmetic is performed
3107 constInitX = (unsigned int)constInit;
3111 constInitX = (signed int)constInit;
3116 noway_assert(!"Bad type");
3120 /* If iterInc is zero we have an infinite loop */
3126 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
3127 iterSign = (iterInc > 0) ? +1 : -1;
3129 /* Initialize loopCount to zero */
3132 // If dupCond is true then the loop head contains a test which skips
3133 // this loop, if the constInit does not pass the loop test
3134 // Such a loop can execute zero times.
3135 // If dupCond is false then we have a true do-while loop which we
3136 // always execute the loop once before performing the loop test
3140 constInitX += iterInc;
3143 // bail if count is based on wrap-around math
3146 if (constLimitX < constInitX)
3151 else if (constLimitX > constInitX)
3156 /* Compute the number of repetitions */
3160 __int64 iterAtExitX;
3163 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
3167 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
3168 * have a constant number of iterations or loop forever -
3169 * we have to compute (lim-init) mod iterInc to see if it is zero.
3170 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
3171 * which is probably not what the end user wanted, but it is legal.
3176 /* Stepping by one, i.e. Mod with 1 is always zero */
3179 if (((constLimitX - constInitX) % iterInc) != 0)
3187 noway_assert(iterInc < 0);
3188 /* Stepping by -1, i.e. Mod with 1 is always zero */
3191 if (((constInitX - constLimitX) % (-iterInc)) != 0)
3200 #ifdef LEGACY_BACKEND
3207 #ifdef LEGACY_BACKEND
3211 if (constInitX != constLimitX)
3213 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3216 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3220 iterAtExitX = (unsigned)iterAtExitX;
3223 // Check if iteration incr will cause overflow for small types
3224 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3229 // iterator with 32bit overflow. Bad for TYP_(U)INT
3230 if (iterAtExitX < constLimitX)
3235 *iterCount = loopCount;
3238 #ifdef LEGACY_BACKEND
3253 noway_assert(!"Unknown operator for loop iterator");
3260 #ifdef LEGACY_BACKEND
3267 #ifdef LEGACY_BACKEND
3271 if (constInitX < constLimitX)
3273 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3276 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3280 iterAtExitX = (unsigned)iterAtExitX;
3283 // Check if iteration incr will cause overflow for small types
3284 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3289 // iterator with 32bit overflow. Bad for TYP_(U)INT
3290 if (iterAtExitX < constLimitX)
3295 *iterCount = loopCount;
3298 #ifdef LEGACY_BACKEND
3313 noway_assert(!"Unknown operator for loop iterator");
3320 #ifdef LEGACY_BACKEND
3327 #ifdef LEGACY_BACKEND
3331 if (constInitX <= constLimitX)
3333 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
3336 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3340 iterAtExitX = (unsigned)iterAtExitX;
3343 // Check if iteration incr will cause overflow for small types
3344 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3349 // iterator with 32bit overflow. Bad for TYP_(U)INT
3350 if (iterAtExitX <= constLimitX)
3355 *iterCount = loopCount;
3358 #ifdef LEGACY_BACKEND
3373 noway_assert(!"Unknown operator for loop iterator");
3380 #ifdef LEGACY_BACKEND
3387 #ifdef LEGACY_BACKEND
3391 if (constInitX > constLimitX)
3393 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3396 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3400 iterAtExitX = (unsigned)iterAtExitX;
3403 // Check if small types will underflow
3404 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
3409 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
3410 if (iterAtExitX > constLimitX)
3415 *iterCount = loopCount;
3418 #ifdef LEGACY_BACKEND
3433 noway_assert(!"Unknown operator for loop iterator");
3440 #ifdef LEGACY_BACKEND
3447 #ifdef LEGACY_BACKEND
3451 if (constInitX >= constLimitX)
3453 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
3456 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3460 iterAtExitX = (unsigned)iterAtExitX;
3463 // Check if small types will underflow
3464 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
3469 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
3470 if (iterAtExitX >= constLimitX)
3475 *iterCount = loopCount;
3478 #ifdef LEGACY_BACKEND
3493 noway_assert(!"Unknown operator for loop iterator");
3498 noway_assert(!"Unknown operator for loop condition");
3504 /*****************************************************************************
3506 * Look for loop unrolling candidates and unroll them
3510 #pragma warning(push)
3511 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3513 void Compiler::optUnrollLoops()
3515 if (compCodeOpt() == SMALL_CODE)
3520 if (optLoopCount == 0)
3526 if (JitConfig.JitNoUnroll())
3535 printf("*************** In optUnrollLoops()\n");
3538 /* Look for loop unrolling candidates */
3540 bool change = false;
3542 // Visit loops from highest to lowest number to vist them in innermost
3543 // to outermost order
3544 for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum)
3546 // This is necessary due to an apparent analysis limitation since
3547 // optLoopCount must be strictly greater than 0 upon entry and lnum
3548 // cannot wrap due to the loop termination condition.
3549 PREFAST_ASSUME(lnum != 0U - 1);
3563 int lbeg; // initial value for iterator
3564 int llim; // limit value for iterator
3565 unsigned lvar; // iterator lclVar #
3566 int iterInc; // value to increment the iterator
3567 genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.)
3568 var_types iterOperType; // type result of the oper (for overflow instrs)
3569 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
3570 bool unsTest; // Is the comparison u/int
3572 unsigned loopRetCount; // number of BBJ_RETURN blocks in loop
3573 unsigned totalIter; // total number of iterations in the constant loop
3574 unsigned loopFlags; // actual lpFlags
3575 unsigned requiredFlags; // required lpFlags
3577 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
3584 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
3585 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
3587 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
3590 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
3596 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
3597 300, // BLENDED_CODE
3603 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
3604 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
3606 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
3608 loopFlags = optLoopTable[lnum].lpFlags;
3609 // Check for required flags:
3610 // LPFLG_DO_WHILE - required because this transform only handles loops of this form
3611 // LPFLG_CONST - required because this transform only handles full unrolls
3612 // LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons
3613 requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT;
3616 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
3618 // In stress mode, quadruple the size limit, and drop
3619 // the restriction that loop limit must be Vector<T>.Count.
3622 requiredFlags &= ~LPFLG_SIMD_LIMIT;
3626 /* Ignore the loop if we don't have a do-while
3627 that has a constant number of iterations */
3629 if ((loopFlags & requiredFlags) != requiredFlags)
3634 /* ignore if removed or marked as not unrollable */
3636 if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
3641 head = optLoopTable[lnum].lpHead;
3643 bottom = optLoopTable[lnum].lpBottom;
3644 noway_assert(bottom);
3646 /* Get the loop data:
3650 - iterator increment
3651 - increment operation type (i.e. ADD, SUB, etc...)
3652 - loop test type (i.e. GT_GE, GT_LT, etc...)
3655 lbeg = optLoopTable[lnum].lpConstInit;
3656 llim = optLoopTable[lnum].lpConstLimit();
3657 testOper = optLoopTable[lnum].lpTestOper();
3659 lvar = optLoopTable[lnum].lpIterVar();
3660 iterInc = optLoopTable[lnum].lpIterConst();
3661 iterOper = optLoopTable[lnum].lpIterOper();
3663 iterOperType = optLoopTable[lnum].lpIterOperType();
3664 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
3666 if (lvaTable[lvar].lvAddrExposed)
3667 { // If the loop iteration variable is address-exposed then bail
3670 if (lvaTable[lvar].lvIsStructField)
3671 { // If the loop iteration variable is a promoted field from a struct then
3676 /* Locate the pre-header and initialization and increment/test statements */
3678 phdr = head->bbTreeList;
3680 loop = bottom->bbTreeList;
3683 init = head->lastStmt();
3684 noway_assert(init && (init->gtNext == nullptr));
3685 test = bottom->lastStmt();
3686 noway_assert(test && (test->gtNext == nullptr));
3687 incr = test->gtPrev;
3690 if (init->gtFlags & GTF_STMT_CMPADD)
3692 /* Must be a duplicated loop condition */
3693 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3696 init = init->gtPrev;
3704 /* Find the number of iterations - the function returns false if not a constant number */
3706 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3711 /* Forget it if there are too many repetitions or not a constant loop */
3713 if (totalIter > iterLimit)
3718 noway_assert(init->gtOper == GT_STMT);
3719 init = init->gtStmt.gtStmtExpr;
3720 noway_assert(test->gtOper == GT_STMT);
3721 test = test->gtStmt.gtStmtExpr;
3722 noway_assert(incr->gtOper == GT_STMT);
3723 incr = incr->gtStmt.gtStmtExpr;
3725 // Don't unroll loops we don't understand.
3726 if (incr->gtOper != GT_ASG)
3730 incr = incr->gtOp.gtOp2;
3732 /* Make sure everything looks ok */
3733 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3734 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3735 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3737 !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3738 (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3739 (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3741 (test->gtOper != GT_JTRUE))
3743 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3747 /* heuristic - Estimated cost in code size of the unrolled loop */
3750 ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration
3752 block = head->bbNext;
3753 auto tryIndex = block->bbTryIndex;
3756 for (;; block = block->bbNext)
3758 if (block->bbTryIndex != tryIndex)
3760 // Unrolling would require cloning EH regions
3764 if (block->bbJumpKind == BBJ_RETURN)
3769 /* Visit all the statements in the block */
3771 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3773 /* Calculate gtCostSz */
3774 gtSetStmtInfo(stmt);
3776 /* Update loopCostSz */
3777 loopCostSz += stmt->gtCostSz;
3780 if (block == bottom)
3786 #ifdef JIT32_GCENCODER
3787 if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX)
3789 // Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs.
3792 #endif // !JIT32_GCENCODER
3794 /* Compute the estimated increase in code size for the unrolled loop */
3796 ClrSafeInt<unsigned> fixedLoopCostSz(8);
3798 ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) -
3799 ClrSafeInt<int>(loopCostSz + fixedLoopCostSz);
3801 /* Don't unroll if too much code duplication would result. */
3803 if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz))
3808 /* Looks like a good idea to unroll this loop, let's do it! */
3809 CLANG_FORMAT_COMMENT_ANCHOR;
3814 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3815 if (head->bbNext->bbNum != bottom->bbNum)
3817 printf("..BB%02u", bottom->bbNum);
3819 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3820 printf(" unrollCostSz = %d\n", unrollCostSz);
3826 /* Create the unrolled loop statement list */
3828 BlockToBlockMap blockMap(getAllocator());
3829 BasicBlock* insertAfter = bottom;
3831 for (lval = lbeg; totalIter; totalIter--)
3833 for (block = head->bbNext;; block = block->bbNext)
3835 BasicBlock* newBlock = insertAfter =
3836 fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true);
3837 blockMap.Set(block, newBlock);
3839 if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval))
3841 // cloneExpr doesn't handle everything
3842 BasicBlock* oldBottomNext = insertAfter->bbNext;
3843 bottom->bbNext = oldBottomNext;
3844 oldBottomNext->bbPrev = bottom;
3845 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3848 // Block weight should no longer have the loop multiplier
3849 newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
3850 // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
3851 assert(newBlock->bbJumpDest == nullptr);
3853 if (block == bottom)
3855 // Remove the test; we're doing a full unroll.
3857 GenTreeStmt* testCopyStmt = newBlock->lastStmt();
3858 GenTree* testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr;
3859 assert(testCopyExpr->gtOper == GT_JTRUE);
3860 GenTree* sideEffList = nullptr;
3861 gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF);
3862 if (sideEffList == nullptr)
3864 fgRemoveStmt(newBlock, testCopyStmt);
3868 testCopyStmt->gtStmt.gtStmtExpr = sideEffList;
3870 newBlock->bbJumpKind = BBJ_NONE;
3872 // Exit this loop; we've walked all the blocks.
3877 // Now redirect any branches within the newly-cloned iteration
3878 for (block = head->bbNext; block != bottom; block = block->bbNext)
3880 BasicBlock* newBlock = blockMap[block];
3881 optCopyBlkDest(block, newBlock);
3882 optRedirectBlock(newBlock, &blockMap);
3885 /* update the new value for the unrolled iterator */
3899 noway_assert(!"Unrolling not implemented for this loop iterator");
3903 noway_assert(!"Unknown operator for constant loop iterator");
3908 // Gut the old loop body
3909 for (block = head->bbNext;; block = block->bbNext)
3911 block->bbTreeList = nullptr;
3912 block->bbJumpKind = BBJ_NONE;
3913 block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD);
3914 if (block->bbJumpDest != nullptr)
3916 block->bbJumpDest = nullptr;
3919 if (block == bottom)
3925 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3927 if (head->bbJumpKind == BBJ_COND)
3929 phdr = head->bbTreeList;
3931 test = phdr->gtPrev;
3933 noway_assert(test && (test->gtNext == nullptr));
3934 noway_assert(test->gtOper == GT_STMT);
3935 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3937 init = test->gtPrev;
3938 noway_assert(init && (init->gtNext == test));
3939 noway_assert(init->gtOper == GT_STMT);
3941 init->gtNext = nullptr;
3942 phdr->gtPrev = init;
3943 head->bbJumpKind = BBJ_NONE;
3944 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3948 /* the loop must execute */
3949 noway_assert(head->bbJumpKind == BBJ_NONE);
3955 printf("Whole unrolled loop:\n");
3959 fgDumpTrees(head->bbNext, insertAfter);
3963 /* Remember that something has changed */
3967 /* Make sure to update loop table */
3969 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3970 * (also make head and bottom NULL - to hit an assert or GPF) */
3972 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3973 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3975 // Note if we created new BBJ_RETURNs
3976 fgReturnCount += loopRetCount * (totalIter - 1);
3984 fgUpdateChangedFlowGraph();
3988 fgDebugCheckBBlist(true);
3992 #pragma warning(pop)
3995 /*****************************************************************************
3997 * Return false if there is a code path from 'topBB' to 'botBB' that might
3998 * not execute a method call.
4001 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
4003 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
4004 // as some helper calls are neither interruptible nor hijackable.
4005 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
4006 // those helpers too.
4008 noway_assert(topBB->bbNum <= botBB->bbNum);
4010 // We can always check topBB and botBB for any gc safe points and early out
4012 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
4017 // Otherwise we will need to rely upon the dominator sets
4019 if (!fgDomsComputed)
4021 // return a conservative answer of true when we don't have the dominator sets
4025 BasicBlock* curBB = topBB;
4028 noway_assert(curBB);
4030 // If we added a loop pre-header block then we will
4031 // have a bbNum greater than fgLastBB, and we won't have
4032 // any dominator information about this block, so skip it.
4034 if (curBB->bbNum <= fgLastBB->bbNum)
4036 noway_assert(curBB->bbNum <= botBB->bbNum);
4038 // Does this block contain a gc safe point?
4040 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
4042 // Will this block always execute on the way to botBB ?
4044 // Since we are checking every block in [topBB .. botBB] and we are using
4045 // a lexical definition of a loop.
4046 // (all that we know is that is that botBB is a back-edge to topBB)
4047 // Thus while walking blocks in this range we may encounter some blocks
4048 // that are not really part of the loop, and so we need to perform
4049 // some additional checks:
4051 // We will check that the current 'curBB' is reachable from 'topBB'
4052 // and that it dominates the block containing the back-edge 'botBB'
4053 // When both of these are true then we know that the gcsafe point in 'curBB'
4054 // will be encountered in the loop and we can return false
4056 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
4063 // If we've reached the destination block, then we're done
4072 curBB = curBB->bbNext;
4075 // If we didn't find any blocks that contained a gc safe point and
4076 // also met the fgDominate and fgReachable criteria then we must return true
4081 /*****************************************************************************
4083 * Find the loop termination test at the bottom of the loop
4086 static GenTree* optFindLoopTermTest(BasicBlock* bottom)
4088 GenTree* testt = bottom->bbTreeList;
4090 assert(testt && testt->gtOper == GT_STMT);
4092 GenTree* result = testt->gtPrev;
4095 while (testt->gtNext)
4097 testt = testt->gtNext;
4100 assert(testt == result);
4106 /*****************************************************************************
4107 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
4110 void Compiler::fgOptWhileLoop(BasicBlock* block)
4112 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
4113 noway_assert(compCodeOpt() != SMALL_CODE);
4116 Optimize while loops into do { } while loop
4117 Our loop hoisting logic requires do { } while loops.
4118 Specifically, we're looking for the following case:
4129 If we find this, and the condition is simple enough, we change
4130 the loop to the following:
4135 // else fall-through
4146 /* Does the BB end with an unconditional jump? */
4148 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
4149 { // It can't be one of the ones we use for our exception magic
4153 // It has to be a forward jump
4154 // TODO-CQ: Check if we can also optimize the backwards jump as well.
4156 if (fgIsForwardBranch(block) == false)
4161 // Get hold of the jump target
4162 BasicBlock* bTest = block->bbJumpDest;
4164 // Does the block consist of 'jtrue(cond) block' ?
4165 if (bTest->bbJumpKind != BBJ_COND)
4170 // bTest must be a backwards jump to block->bbNext
4171 if (bTest->bbJumpDest != block->bbNext)
4176 // Since test is a BBJ_COND it will have a bbNext
4177 noway_assert(bTest->bbNext);
4179 // 'block' must be in the same try region as the condition, since we're going to insert
4180 // a duplicated condition in 'block', and the condition might include exception throwing code.
4181 if (!BasicBlock::sameTryRegion(block, bTest))
4186 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
4187 // same try region (or no try region) to avoid generating illegal flow.
4188 BasicBlock* bTestNext = bTest->bbNext;
4189 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
4194 GenTree* condStmt = optFindLoopTermTest(bTest);
4196 // bTest must only contain only a jtrue with no other stmts, we will only clone
4197 // the conditional, so any other statements will not get cloned
4198 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
4200 if (bTest->bbTreeList != condStmt)
4205 /* Get to the condition node from the statement tree */
4207 noway_assert(condStmt->gtOper == GT_STMT);
4209 GenTree* condTree = condStmt->gtStmt.gtStmtExpr;
4210 noway_assert(condTree->gtOper == GT_JTRUE);
4212 condTree = condTree->gtOp.gtOp1;
4214 // The condTree has to be a RelOp comparison
4215 // TODO-CQ: Check if we can also optimize the backwards jump as well.
4217 if (condTree->OperIsCompare() == false)
4222 /* We call gtPrepareCost to measure the cost of duplicating this tree */
4224 gtPrepareCost(condTree);
4225 unsigned estDupCostSz = condTree->gtCostSz;
4227 double loopIterations = (double)BB_LOOP_WEIGHT;
4229 bool allProfileWeightsAreValid = false;
4230 BasicBlock::weight_t weightBlock = block->bbWeight;
4231 BasicBlock::weight_t weightTest = bTest->bbWeight;
4232 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
4234 // If we have profile data then we calculate the number of time
4235 // the loop will iterate into loopIterations
4236 if (fgIsUsingProfileWeights())
4238 // Only rely upon the profile weight when all three of these blocks
4239 // have good profile weights
4240 if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight())
4242 allProfileWeightsAreValid = true;
4244 // If this while loop never iterates then don't bother transforming
4245 if (weightNext == 0)
4250 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
4251 // if the profile weights are all valid.
4253 // weightNext is the number of time this loop iterates
4254 // weightBlock is the number of times that we enter the while loop
4255 // loopIterations is the average number of times that this loop iterates
4257 if (weightTest >= weightBlock)
4259 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
4264 unsigned maxDupCostSz = 32;
4266 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
4267 // set loop weights yet
4268 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
4273 // If this loop iterates a lot then raise the maxDupCost
4274 if (loopIterations >= 12.0)
4278 if (loopIterations >= 96.0)
4283 // If the loop condition has a shared static helper, we really want this loop converted
4284 // as not converting the loop will disable loop hoisting, meaning the shared helper will
4285 // be executed on every loop iteration.
4286 int countOfHelpers = 0;
4287 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
4289 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
4291 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
4294 // If the compare has too high cost then we don't want to dup
4296 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
4301 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
4302 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
4303 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
4304 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
4305 allProfileWeightsAreValid ? "true" : "false");
4314 /* Looks good - duplicate the condition test */
4316 condTree->gtFlags |= GTF_RELOP_ZTT;
4318 condTree = gtCloneExpr(condTree);
4319 gtReverseCond(condTree);
4321 // Make sure clone expr copied the flag
4322 assert(condTree->gtFlags & GTF_RELOP_ZTT);
4324 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
4326 /* Create a statement entry out of the condition and
4327 append the condition test at the end of 'block' */
4329 GenTree* copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
4331 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
4333 if (opts.compDbgInfo)
4335 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
4338 // Flag the block that received the copy as potentially having an array/vtable
4339 // reference if the block copied from did; this is a conservative guess.
4340 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
4342 block->bbFlags |= copyFlags;
4345 // If we have profile data for all blocks and we know that we are cloning the
4346 // bTest block into block and thus changing the control flow from block so
4347 // that it no longer goes directly to bTest anymore, we have to adjust the
4348 // weight of bTest by subtracting out the weight of block.
4350 if (allProfileWeightsAreValid)
4353 // Some additional sanity checks before adjusting the weight of bTest
4355 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
4357 // Get the two edge that flow out of bTest
4358 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
4359 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
4361 // Calculate the new weight for block bTest
4363 BasicBlock::weight_t newWeightTest =
4364 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
4365 bTest->bbWeight = newWeightTest;
4367 if (newWeightTest == BB_ZERO_WEIGHT)
4369 bTest->bbFlags |= BBF_RUN_RARELY;
4370 // All out edge weights are set to zero
4371 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
4372 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
4373 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
4374 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
4378 // Update the our edge weights
4379 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
4380 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
4381 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
4382 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
4387 /* Change the block to end with a conditional jump */
4389 block->bbJumpKind = BBJ_COND;
4390 block->bbJumpDest = bTest->bbNext;
4392 /* Mark the jump dest block as being a jump target */
4393 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
4395 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
4397 fgAddRefPred(block->bbNext, block);
4399 fgRemoveRefPred(bTest, block);
4400 fgAddRefPred(bTest->bbNext, block);
4405 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
4407 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
4409 gtDispTree(copyOfCondStmt);
4415 /*****************************************************************************
4417 * Optimize the BasicBlock layout of the method
4420 void Compiler::optOptimizeLayout()
4422 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
4427 printf("*************** In optOptimizeLayout()\n");
4431 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
4432 fgDebugCheckBBlist();
4435 noway_assert(fgModified == false);
4437 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
4439 /* Make sure the appropriate fields are initialized */
4441 if (block->bbWeight == BB_ZERO_WEIGHT)
4443 /* Zero weighted block can't have a LOOP_HEAD flag */
4444 noway_assert(block->isLoopHead() == false);
4448 assert(block->bbLoopNum == 0);
4450 if (compCodeOpt() != SMALL_CODE)
4452 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
4454 fgOptWhileLoop(block);
4460 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
4461 fgComputeEdgeWeights();
4464 fgUpdateFlowGraph(true);
4466 fgUpdateFlowGraph();
4469 /*****************************************************************************
4471 * Perform loop inversion, find and classify natural loops
4474 void Compiler::optOptimizeLoops()
4476 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
4481 printf("*************** In optOptimizeLoops()\n");
4485 optSetBlockWeights();
4487 /* Were there any loops in the flow graph? */
4491 /* now that we have dominator information we can find loops */
4493 optFindNaturalLoops();
4495 unsigned loopNum = 0;
4497 /* Iterate over the flow graph, marking all loops */
4499 /* We will use the following terminology:
4500 * top - the first basic block in the loop (i.e. the head of the backward edge)
4501 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
4502 * lastBottom - used when we have multiple back-edges to the same top
4509 for (top = fgFirstBB; top; top = top->bbNext)
4511 BasicBlock* foundBottom = nullptr;
4513 for (pred = top->bbPreds; pred; pred = pred->flNext)
4515 /* Is this a loop candidate? - We look for "back edges" */
4517 BasicBlock* bottom = pred->flBlock;
4519 /* is this a backward edge? (from BOTTOM to TOP) */
4521 if (top->bbNum > bottom->bbNum)
4526 /* 'top' also must have the BBF_LOOP_HEAD flag set */
4528 if (top->isLoopHead() == false)
4533 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
4535 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
4540 /* the top block must be able to reach the bottom block */
4541 if (!fgReachable(top, bottom))
4546 /* Found a new loop, record the longest backedge in foundBottom */
4548 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
4550 foundBottom = bottom;
4558 /* Mark the loop header as such */
4559 assert(FitsIn<unsigned char>(loopNum));
4560 top->bbLoopNum = (unsigned char)loopNum;
4563 /* Mark all blocks between 'top' and 'bottom' */
4565 optMarkLoopBlocks(top, foundBottom, false);
4568 // We track at most 255 loops
4572 totalUnnatLoopOverflows++;
4579 totalUnnatLoopCount += loopNum;
4587 printf("\nFound a total of %d loops.", loopNum);
4588 printf("\nAfter loop weight marking:\n");
4589 fgDispBasicBlocks();
4594 optLoopsMarked = true;
4598 //------------------------------------------------------------------------
4599 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
4602 // loopNum - the current loop index for which conditions are derived.
4603 // context - data structure where all loop cloning info is kept.
4606 // "false" if conditions cannot be obtained. "true" otherwise.
4607 // The cloning conditions are updated in the "conditions"[loopNum] field
4608 // of the "context" parameter.
4611 // Inspect the loop cloning optimization candidates and populate the conditions necessary
4612 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
4613 // condition is "less than". If the initializer is "var" init then adds condition
4614 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
4615 // are added to "context". These conditions are checked in the pre-header block
4616 // and the cloning choice is made.
4619 // Callers should assume AND operation is used i.e., if all conditions are
4620 // true, then take the fast path.
4622 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
4624 JITDUMP("------------------------------------------------------------\n");
4625 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
4627 LoopDsc* loop = &optLoopTable[loopNum];
4628 JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4630 if (loop->lpTestOper() == GT_LT)
4632 // Stride conditions
4633 if (loop->lpIterConst() <= 0)
4635 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
4640 if (loop->lpFlags & LPFLG_CONST_INIT)
4642 // Only allowing const init at this time.
4643 if (loop->lpConstInit < 0)
4645 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
4649 else if (loop->lpFlags & LPFLG_VAR_INIT)
4652 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
4653 LC_Expr(LC_Ident(0, LC_Ident::Const)));
4654 context->EnsureConditions(loopNum)->Push(geZero);
4658 JITDUMP("> Not variable init\n");
4664 if (loop->lpFlags & LPFLG_CONST_LIMIT)
4666 int limit = loop->lpConstLimit();
4669 JITDUMP("> limit %d is invalid\n", limit);
4672 ident = LC_Ident(static_cast<unsigned>(limit), LC_Ident::Const);
4674 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
4676 unsigned limitLcl = loop->lpVarLimit();
4677 ident = LC_Ident(limitLcl, LC_Ident::Var);
4679 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
4681 context->EnsureConditions(loopNum)->Push(geZero);
4683 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
4685 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
4686 if (!loop->lpArrLenLimit(this, index))
4688 JITDUMP("> ArrLen not matching");
4691 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4693 // Ensure that this array must be dereference-able, before executing the actual condition.
4694 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4695 context->EnsureDerefs(loopNum)->Push(array);
4699 JITDUMP("> Undetected limit\n");
4703 for (unsigned i = 0; i < optInfos->Size(); ++i)
4705 LcOptInfo* optInfo = optInfos->GetRef(i);
4706 switch (optInfo->GetOptType())
4708 case LcOptInfo::LcJaggedArray:
4711 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4712 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4713 LC_Ident arrLenIdent = LC_Ident(arrLen);
4715 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4716 context->EnsureConditions(loopNum)->Push(cond);
4718 // Ensure that this array must be dereference-able, before executing the actual condition.
4719 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4720 context->EnsureDerefs(loopNum)->Push(array);
4723 case LcOptInfo::LcMdArray:
4725 // limit <= mdArrLen
4726 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4727 LC_Condition cond(GT_LE, LC_Expr(ident),
4728 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4729 mdArrInfo->GetArrIndexForDim(getAllocator()),
4730 mdArrInfo->dim, LC_Array::None))));
4731 context->EnsureConditions(loopNum)->Push(cond);
4736 JITDUMP("Unknown opt\n");
4740 JITDUMP("Conditions: (");
4741 DBEXEC(verbose, context->PrintConditions(loopNum));
4748 //------------------------------------------------------------------------------------
4749 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4752 // loopNum - the current loop index for which conditions are derived.
4753 // context - data structure where all loop cloning info is kept.
4756 // "false" if conditions cannot be obtained. "true" otherwise.
4757 // The deref conditions are updated in the "derefConditions"[loopNum] field
4758 // of the "context" parameter.
4760 // Definition of Deref Conditions:
4761 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4762 // we should first be able to dereference "a". i.e., "a" is non-null.
4768 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4769 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4772 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4773 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4774 // be true to do the deref.
4776 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4778 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4779 // blocks each of which will branch to slow path if the condition evaluates to false.
4781 // Now, imagine a situation where we have
4782 // a[x][y][k] = 20 and a[i][j][k] = 0
4783 // also in the inner most loop where x, y are parameters, then our conditions will have
4787 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4789 // But these conditions can be checked together with conditions
4790 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4793 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4794 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4795 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4796 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4798 // This naturally yields a tree style pattern, where the nodes of the tree are
4799 // the array and indices respectively.
4815 // Notice that the variables in the same levels can have their conditions combined in the
4816 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4817 // combined with a short-circuiting AND (i.e., different basic blocks).
4820 // Construct a tree of array indices and the array which will generate the optimal
4821 // conditions for loop cloning.
4823 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4838 // In this method, we will construct such a tree by descending depth first into the array
4839 // index operation and forming a tree structure as we encounter the array or the index variables.
4841 // This tree structure will then be used to generate conditions like below:
4842 // (a != null) & (b != null) && // from the first level of the tree.
4844 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4845 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4847 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4848 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4853 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4855 JitExpandArrayStack<LC_Deref*> nodes(getAllocator());
4858 // Get the dereference-able arrays.
4859 JitExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4861 // For each array in the dereference list, construct a tree,
4862 // where the nodes are array and index variables and an edge 'u-v'
4863 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4864 // 'u-v-w' transitively if u[v][w] occurs.
4865 for (unsigned i = 0; i < deref->Size(); ++i)
4867 LC_Array& array = (*deref)[i];
4869 // First populate the array base variable.
4870 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4871 if (node == nullptr)
4873 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4877 // For each dimension (level) for the array, populate the tree with the variable
4878 // from that dimension.
4879 unsigned rank = (unsigned)array.GetDimRank();
4880 for (unsigned i = 0; i < rank; ++i)
4882 node->EnsureChildren(getAllocator());
4883 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4886 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4887 node->children->Push(tmp);
4890 // Descend one level down.
4894 // Keep the maxRank of all array dereferences.
4895 maxRank = max((int)rank, maxRank);
4901 for (unsigned i = 0; i < nodes.Size(); ++i)
4918 // First level will always yield the null-check, since it is made of the array base variables.
4919 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4920 // So add 1 after rank * 2.
4921 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4923 // Heuristic to not create too many blocks;
4929 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4930 JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* levelCond =
4931 context->EnsureBlockConditions(loopNum, condBlocks);
4932 for (unsigned i = 0; i < nodes.Size(); ++i)
4934 nodes[i]->DeriveLevelConditions(levelCond);
4937 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4942 //----------------------------------------------------------------------------
4943 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4946 // block - the block in which the helper call needs to be inserted.
4947 // insertBefore - the tree before which the helper call will be inserted.
4949 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTree* insertBefore)
4951 if (JitConfig.JitDebugLogLoopCloning() == 0)
4955 GenTree* logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4956 GenTree* stmt = fgNewStmtFromTree(logCall);
4957 fgInsertStmtBefore(block, insertBefore, stmt);
4958 fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning"));
4962 //------------------------------------------------------------------------
4963 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4964 // candidates gathered during the cloning phase.
4967 // loopNum - the current loop index for which the optimizations are performed.
4968 // context - data structure where all loop cloning info is kept.
4969 // dynamicPath - If true, the optimization is performed in the fast path among the
4970 // cloned loops. If false, it means this is the only path (i.e.,
4971 // there is no slow path.)
4974 // Perform the optimizations on the fast path i.e., the path in which the
4975 // optimization candidates were collected at the time of identifying them.
4976 // The candidates store all the information necessary (the tree/stmt/block
4977 // they are from) to perform the optimization.
4980 // The unoptimized path is either already cloned when this method is called or
4981 // there is no unoptimized path (got eliminated statically.) So this method
4982 // performs the optimizations assuming that the path in which the candidates
4983 // were collected is the fast path in which the optimizations will be performed.
4985 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4987 JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4988 for (unsigned i = 0; i < optInfos->Size(); ++i)
4990 LcOptInfo* optInfo = optInfos->GetRef(i);
4991 switch (optInfo->GetOptType())
4993 case LcOptInfo::LcJaggedArray:
4995 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4996 compCurBB = arrIndexInfo->arrIndex.useBlock;
4997 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt);
4998 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
5001 case LcOptInfo::LcMdArray:
5002 // TODO-CQ: CLONE: Implement.
5010 //----------------------------------------------------------------------------
5011 // optCanCloneLoops: Use the environment flag to determine whether loop
5012 // cloning is allowed to be performed.
5015 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
5016 // Disabled for retail for now.
5018 bool Compiler::optCanCloneLoops()
5020 // Enabled for retail builds now.
5021 unsigned cloneLoopsFlag = 1;
5023 cloneLoopsFlag = JitConfig.JitCloneLoops();
5025 return (cloneLoopsFlag != 0);
5028 //----------------------------------------------------------------------------
5029 // optIsLoopClonable: Determine whether this loop can be cloned.
5032 // loopInd loop index which needs to be checked if it can be cloned.
5035 // Returns true if the loop can be cloned. If it returns false
5036 // prints a message in debug as why the loop can't be cloned.
5038 bool Compiler::optIsLoopClonable(unsigned loopInd)
5040 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
5041 // inserting new EH regions in the exception table yet.
5042 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
5043 unsigned loopRetCount = 0;
5044 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
5046 if (blk->bbJumpKind == BBJ_RETURN)
5050 if (bbIsTryBeg(blk))
5052 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
5057 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
5058 // into the middle of a handler (to go to the cloned copy.) Reject.
5059 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
5061 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
5065 // If the head and entry are in different EH regions, reject.
5066 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
5068 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
5072 // Is the first block after the last block of the loop a handler or filter start?
5073 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
5074 // and go to where the original loop did. That raises problems when we don't actually go to
5075 // that block; this is one of those cases. This could be fixed fairly easily; for example,
5076 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
5077 // loop. This is just a corner to cut to get this working faster.
5078 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
5079 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
5081 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
5085 // We've previously made a decision whether to have separate return epilogs, or branch to one.
5086 // There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate
5087 // epilogs. Other architectures have a limit of 4 here for "historical reasons", but this should be revisited
5088 // (or return blocks should not be considered part of the loop, rendering this issue moot).
5089 unsigned epilogLimit = 4;
5090 #ifdef JIT32_GCENCODER
5091 epilogLimit = SET_EPILOGCNT_MAX;
5092 #endif // JIT32_GCENCODER
5093 if (fgReturnCount + loopRetCount > epilogLimit)
5095 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
5096 "would exceed the limit of %d.\n",
5097 loopRetCount, fgReturnCount, epilogLimit);
5101 // Otherwise, we're going to add those return blocks.
5102 fgReturnCount += loopRetCount;
5107 /*****************************************************************************
5109 * Identify loop cloning opportunities, derive loop cloning conditions,
5110 * perform loop cloning, use the derived conditions to choose which
5113 void Compiler::optCloneLoops()
5115 JITDUMP("\n*************** In optCloneLoops()\n");
5116 if (optLoopCount == 0 || !optCanCloneLoops())
5124 printf("Blocks/Trees at start of phase\n");
5125 fgDispBasicBlocks(true);
5129 LoopCloneContext context(optLoopCount, getAllocator());
5131 // Obtain array optimization candidates in the context.
5132 optObtainLoopCloningOpts(&context);
5134 // For each loop, derive cloning conditions for the optimization candidates.
5135 for (unsigned i = 0; i < optLoopCount; ++i)
5137 JitExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
5138 if (optInfos == nullptr)
5143 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
5145 JITDUMP("> Conditions could not be obtained\n");
5146 context.CancelLoopOptInfo(i);
5150 bool allTrue = false;
5151 bool anyFalse = false;
5152 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
5155 context.CancelLoopOptInfo(i);
5159 // Perform static optimizations on the fast path since we always
5160 // have to take the cloned path.
5161 optPerformStaticOptimizations(i, &context DEBUGARG(false));
5163 // No need to clone.
5164 context.CancelLoopOptInfo(i);
5170 // The code in this #if has been useful in debugging loop cloning issues, by
5171 // enabling selective enablement of the loop cloning optimization according to
5174 unsigned methHash = info.compMethodHash();
5175 char* lostr = getenv("loopclonehashlo");
5176 unsigned methHashLo = 0;
5179 sscanf_s(lostr, "%x", &methHashLo);
5180 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5182 char* histr = getenv("loopclonehashhi");
5183 unsigned methHashHi = UINT32_MAX;
5186 sscanf_s(histr, "%x", &methHashHi);
5187 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5189 if (methHash < methHashLo || methHash > methHashHi)
5194 for (unsigned i = 0; i < optLoopCount; ++i)
5196 if (context.GetLoopOptInfo(i) != nullptr)
5199 context.OptimizeConditions(i DEBUGARG(verbose));
5200 context.OptimizeBlockConditions(i DEBUGARG(verbose));
5201 optCloneLoop(i, &context);
5208 printf("\nAfter loop cloning:\n");
5209 fgDispBasicBlocks(/*dumpTrees*/ true);
5214 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
5216 assert(loopInd < optLoopCount);
5218 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
5219 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
5220 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
5222 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
5223 unsigned depth = optLoopDepth(loopInd);
5224 unsigned ambientWeight = 1;
5225 for (unsigned j = 0; j < depth; j++)
5227 unsigned lastWeight = ambientWeight;
5228 ambientWeight *= BB_LOOP_WEIGHT;
5229 // If the multiplication overflowed, stick at max.
5230 // (Strictly speaking, a multiplication could overflow and still have a result
5231 // that is >= lastWeight...but if so, the original weight must be pretty large,
5232 // and it got bigger, so that's OK.)
5233 if (ambientWeight < lastWeight)
5235 ambientWeight = BB_MAX_WEIGHT;
5240 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
5241 // Be safe by taking the max with the head block's weight.
5242 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
5244 // This is the containing loop, if any -- to label any blocks we create that are outside
5245 // the loop being cloned.
5246 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
5248 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
5249 optEnsureUniqueHead(loopInd, ambientWeight);
5251 // We're going to make
5263 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
5275 BasicBlock* h = optLoopTable[loopInd].lpHead;
5276 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
5278 // Make a new block to be the unique entry to the loop.
5279 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
5280 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
5281 /*extendRegion*/ true);
5282 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
5283 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
5284 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5285 newH->bbNatLoopNum = ambientLoop;
5287 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
5290 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
5291 // "newPred" will be the predecessor of the blocks of the cloned loop.
5292 BasicBlock* b = optLoopTable[loopInd].lpBottom;
5293 BasicBlock* newPred = b;
5294 if (b->bbJumpKind != BBJ_ALWAYS)
5296 BasicBlock* x = b->bbNext;
5299 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
5300 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
5302 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5303 x2->bbNatLoopNum = ambientLoop;
5306 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
5311 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
5312 // so that "h" already falls through to "e" (e == t == f).
5313 BasicBlock* h2 = nullptr;
5314 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
5316 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
5317 /*extendRegion*/ true);
5318 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
5320 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5321 h2->bbNatLoopNum = ambientLoop;
5323 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
5324 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
5327 // Now we'll clone the blocks of the loop body.
5328 BasicBlock* newFirst = nullptr;
5329 BasicBlock* newBot = nullptr;
5331 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
5332 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
5335 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
5336 /*extendRegion*/ true);
5338 // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
5339 // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
5340 // checked them to guarantee they are clonable.
5341 bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
5342 noway_assert(cloneOk);
5343 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
5344 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
5345 // loop, if one exists -- the parent of the loop we're cloning.
5346 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
5348 if (newFirst == nullptr)
5352 newBot = newBlk; // Continually overwrite to make sure we get the last one.
5354 blockMap->Set(blk, newBlk);
5357 // Perform the static optimizations on the fast path.
5358 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
5360 // Now go through the new blocks, remapping their jump targets within the loop.
5361 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
5365 BasicBlock* newblk = nullptr;
5366 bool b = blockMap->Lookup(blk, &newblk);
5367 assert(b && newblk != nullptr);
5369 assert(blk->bbJumpKind == newblk->bbJumpKind);
5371 // First copy the jump destination(s) from "blk".
5372 optCopyBlkDest(blk, newblk);
5374 // Now redirect the new block according to "blockMap".
5375 optRedirectBlock(newblk, blockMap);
5378 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
5379 (h->bbJumpKind == BBJ_ALWAYS));
5381 // If all the conditions are true, go to E2.
5382 BasicBlock* e2 = nullptr;
5383 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
5385 h->bbJumpKind = BBJ_COND;
5387 // We will create the following structure
5389 // cond0 (in h) -?> cond1
5390 // slow --> e2 (slow) always
5397 // We should always have block conditions, at the minimum, the array should be deref-able
5398 assert(context->HasBlockConditions(loopInd));
5400 // Create a unique header for the slow path.
5401 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
5402 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
5403 slowHead->bbNatLoopNum = ambientLoop;
5404 slowHead->bbJumpDest = e2;
5406 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
5407 condLast->bbJumpDest = slowHead;
5409 // If h2 is present it is already the head or replace 'h' by 'condLast'.
5412 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
5414 assert(foundIt && e2 != nullptr);
5416 // Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to
5417 // initialize the loop counter immediately before entering the loop, but we've left a shared
5418 // initialization of the loop counter up above the test that determines which version of the
5420 optLoopTable[loopInd].lpFlags |= LPFLG_DONT_UNROLL;
5422 fgUpdateChangedFlowGraph();
5425 //--------------------------------------------------------------------------------------------------
5426 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
5429 // context loop cloning context variable
5430 // loopNum the loop index
5431 // head loop head for "loopNum"
5432 // slowHead the slow path loop head
5438 // Create the following structure.
5440 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
5441 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
5443 // cond0 (in h) -?> cond1
5444 // slowHead --> e2 (slowHead) always
5445 // !cond1 -?> slowHead
5446 // !cond2 -?> slowHead
5448 // !condn -?> slowHead
5451 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
5453 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
5456 BasicBlock* slowHead)
5458 JITDUMP("Inserting loop cloning conditions\n");
5459 assert(context->HasBlockConditions(loopNum));
5461 BasicBlock* curCond = head;
5462 JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
5463 for (unsigned i = 0; i < levelCond->Size(); ++i)
5465 bool isHeaderBlock = (curCond == head);
5467 // Flip the condition if header block.
5468 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
5470 // Create each condition block ensuring wiring between them.
5471 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
5472 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
5475 curCond->inheritWeight(head);
5476 curCond->bbNatLoopNum = head->bbNatLoopNum;
5477 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
5480 // Finally insert cloning conditions after all deref conditions have been inserted.
5481 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
5485 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
5487 BasicBlock* h = optLoopTable[loopInd].lpHead;
5488 BasicBlock* t = optLoopTable[loopInd].lpTop;
5489 BasicBlock* e = optLoopTable[loopInd].lpEntry;
5490 BasicBlock* b = optLoopTable[loopInd].lpBottom;
5492 // If "h" dominates the entry block, then it is the unique header.
5493 if (fgDominate(h, e))
5498 // Otherwise, create a new empty header block, make it the pred of the entry block,
5499 // and redirect the preds of the entry block to go to this.
5501 BasicBlock* beforeTop = t->bbPrev;
5502 // Make sure that the new block is in the same region as the loop.
5503 // (We will only create loops that are entirely within a region.)
5504 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
5505 // This is in the containing loop.
5506 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
5507 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
5509 // We don't care where it was put; splice it between beforeTop and top.
5510 if (beforeTop->bbNext != h2)
5512 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
5513 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
5517 if (h2->bbNext != e)
5519 h2->bbJumpKind = BBJ_ALWAYS;
5522 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
5524 // Redirect paths from preds of "e" to go to "h2" instead of "e".
5525 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
5526 blockMap->Set(e, h2);
5528 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
5530 BasicBlock* predBlock = predEntry->flBlock;
5532 // Skip if predBlock is in the loop.
5533 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
5537 optRedirectBlock(predBlock, blockMap);
5540 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
5543 /*****************************************************************************
5545 * Determine the kind of interference for the call.
5548 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreeCall* call)
5550 // if not a helper, kills everything
5551 if (call->gtCallType != CT_HELPER)
5556 // setfield and array address store kill all indirections
5557 switch (eeGetHelperNum(call->gtCallMethHnd))
5559 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
5560 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
5561 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
5562 case CORINFO_HELP_SETFIELDOBJ:
5563 case CORINFO_HELP_ARRADDR_ST:
5565 return CALLINT_REF_INDIRS;
5567 case CORINFO_HELP_SETFIELDFLOAT:
5568 case CORINFO_HELP_SETFIELDDOUBLE:
5569 case CORINFO_HELP_SETFIELD8:
5570 case CORINFO_HELP_SETFIELD16:
5571 case CORINFO_HELP_SETFIELD32:
5572 case CORINFO_HELP_SETFIELD64:
5574 return CALLINT_SCL_INDIRS;
5576 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this
5577 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
5578 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
5579 case CORINFO_HELP_SETFIELDSTRUCT:
5581 return CALLINT_ALL_INDIRS;
5587 // other helpers kill nothing
5588 return CALLINT_NONE;
5591 /*****************************************************************************
5593 * See if the given tree can be computed in the given precision (which must
5594 * be smaller than the type of the tree for this to make sense). If 'doit'
5595 * is false, we merely check to see whether narrowing is possible; if we
5596 * get called with 'doit' being true, we actually perform the narrowing.
5599 bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
5605 noway_assert(genActualType(tree->gtType) == genActualType(srct));
5607 /* Assume we're only handling integer types */
5608 noway_assert(varTypeIsIntegral(srct));
5609 noway_assert(varTypeIsIntegral(dstt));
5611 unsigned srcSize = genTypeSize(srct);
5612 unsigned dstSize = genTypeSize(dstt);
5614 /* dstt must be smaller than srct to narrow */
5615 if (dstSize >= srcSize)
5620 /* Figure out what kind of a node we have */
5621 oper = tree->OperGet();
5622 kind = tree->OperKind();
5624 if (GenTree::OperIsAssignment(oper))
5626 noway_assert(doit == false);
5630 ValueNumPair NoVNPair = ValueNumPair();
5632 if (kind & GTK_LEAF)
5636 /* Constants can usually be narrowed by changing their value */
5637 CLANG_FORMAT_COMMENT_ANCHOR;
5639 #ifndef _TARGET_64BIT_
5644 lval = tree->gtIntConCommon.LngValue();
5673 if ((lval & lmask) != lval)
5678 tree->ChangeOperConst(GT_CNS_INT);
5679 tree->gtType = TYP_INT;
5680 tree->gtIntCon.gtIconVal = (int)lval;
5681 if (vnStore != nullptr)
5683 fgValueNumberTreeConst(tree);
5693 ival = tree->gtIntCon.gtIconVal;
5712 #ifdef _TARGET_64BIT_
5719 #endif // _TARGET_64BIT_
5724 if ((ival & imask) != ival)
5729 #ifdef _TARGET_64BIT_
5732 tree->gtType = TYP_INT;
5733 tree->gtIntCon.gtIconVal = (int)ival;
5734 if (vnStore != nullptr)
5736 fgValueNumberTreeConst(tree);
5739 #endif // _TARGET_64BIT_
5743 /* Operands that are in memory can usually be narrowed
5744 simply by changing their gtType */
5747 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5748 if (dstSize == sizeof(int))
5761 noway_assert(doit == false);
5765 if (kind & (GTK_BINOP | GTK_UNOP))
5768 op1 = tree->gtOp.gtOp1;
5770 op2 = tree->gtOp.gtOp2;
5772 switch (tree->gtOper)
5775 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5777 // Is op2 a small constant than can be narrowed into dstt?
5778 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5779 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5781 // We will change the type of the tree and narrow op2
5785 tree->gtType = genActualType(dstt);
5786 tree->SetVNs(vnpNarrow);
5788 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5789 // We may also need to cast away the upper bits of op1
5792 assert(tree->gtType == TYP_INT);
5793 op1 = gtNewCastNode(TYP_INT, op1, false, TYP_INT);
5795 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5797 tree->gtOp.gtOp1 = op1;
5808 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5810 noway_assert(doit == false);
5818 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5819 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5821 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5822 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5824 noway_assert(doit == false);
5828 /* Simply change the type of the tree */
5832 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5834 tree->gtFlags &= ~GTF_MUL_64RSLT;
5837 tree->gtType = genActualType(dstt);
5838 tree->SetVNs(vnpNarrow);
5846 /* Simply change the type of the tree */
5848 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5850 tree->gtType = genSignedType(dstt);
5851 tree->SetVNs(vnpNarrow);
5853 /* Make sure we don't mess up the variable type */
5854 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5856 tree->gtFlags |= GTF_VAR_CAST;
5869 /* These can always be narrowed since they only represent 0 or 1 */
5874 var_types cast = tree->CastToType();
5875 var_types oprt = op1->TypeGet();
5876 unsigned oprSize = genTypeSize(oprt);
5883 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5888 if (tree->gtOverflow())
5893 /* Is this a cast from the type we're narrowing to or a smaller one? */
5895 if (oprSize <= dstSize)
5897 /* Bash the target type of the cast */
5901 dstt = genSignedType(dstt);
5903 if (oprSize == dstSize)
5905 // Same size: change the CAST into a NOP
5906 tree->ChangeOper(GT_NOP);
5907 tree->gtType = dstt;
5908 tree->gtOp.gtOp2 = nullptr;
5909 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5913 // oprSize is smaller
5914 assert(oprSize < dstSize);
5916 // Change the CastToType in the GT_CAST node
5917 tree->CastToType() = dstt;
5919 // The result type of a GT_CAST is never a small type.
5920 // Use genActualType to widen dstt when it is a small types.
5921 tree->gtType = genActualType(dstt);
5922 tree->SetVNs(vnpNarrow);
5932 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5934 /* Simply change the type of the tree */
5938 tree->gtType = genActualType(dstt);
5939 tree->SetVNs(vnpNarrow);
5946 noway_assert(doit == false);
5954 /*****************************************************************************
5956 * The following logic figures out whether the given variable is assigned
5957 * somewhere in a list of basic blocks (or in an entire loop).
5960 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTree** pTree, fgWalkData* data)
5962 GenTree* tree = *pTree;
5964 if (tree->OperIsAssignment())
5966 GenTree* dest = tree->gtOp.gtOp1;
5967 genTreeOps destOper = dest->OperGet();
5969 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5970 assert(desc && desc->ivaSelf == desc);
5972 if (destOper == GT_LCL_VAR)
5974 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5975 if (tvar < lclMAX_ALLSET_TRACKED)
5977 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5981 desc->ivaMaskIncomplete = true;
5984 if (tvar == desc->ivaVar)
5986 if (tree != desc->ivaSkip)
5992 else if (destOper == GT_LCL_FLD)
5994 /* We can't track every field of every var. Moreover, indirections
5995 may access different parts of the var as different (but
5996 overlapping) fields. So just treat them as indirect accesses */
5998 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5999 // noway_assert(lvaTable[lclNum].lvAddrTaken);
6001 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
6002 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
6004 else if (destOper == GT_CLS_VAR)
6006 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
6008 else if (destOper == GT_IND)
6010 /* Set the proper indirection bits */
6012 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
6013 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
6016 else if (tree->gtOper == GT_CALL)
6018 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
6019 assert(desc && desc->ivaSelf == desc);
6021 desc->ivaMaskCall = optCallInterf(tree->AsCall());
6024 return WALK_CONTINUE;
6027 /*****************************************************************************/
6029 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTree* skip, unsigned var)
6034 desc.ivaSkip = skip;
6036 desc.ivaSelf = &desc;
6039 desc.ivaMaskCall = CALLINT_NONE;
6040 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
6046 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
6048 noway_assert(stmt->gtOper == GT_STMT);
6049 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
6071 /*****************************************************************************/
6072 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
6076 /* Get hold of the loop descriptor */
6078 noway_assert(lnum < optLoopCount);
6079 loop = optLoopTable + lnum;
6081 /* Do we already know what variables are assigned within this loop? */
6083 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
6090 /* Prepare the descriptor used by the tree walker call-back */
6092 desc.ivaVar = (unsigned)-1;
6093 desc.ivaSkip = nullptr;
6095 desc.ivaSelf = &desc;
6097 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
6098 desc.ivaMaskInd = VR_NONE;
6099 desc.ivaMaskCall = CALLINT_NONE;
6100 desc.ivaMaskIncomplete = false;
6102 /* Now walk all the statements of the loop */
6104 beg = loop->lpHead->bbNext;
6105 end = loop->lpBottom;
6107 for (/**/; /**/; beg = beg->bbNext)
6111 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6113 noway_assert(stmt->gtOper == GT_STMT);
6114 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
6116 if (desc.ivaMaskIncomplete)
6118 loop->lpFlags |= LPFLG_ASGVARS_INC;
6128 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
6129 loop->lpAsgInds = desc.ivaMaskInd;
6130 loop->lpAsgCall = desc.ivaMaskCall;
6132 /* Now we know what variables are assigned in the loop */
6134 loop->lpFlags |= LPFLG_ASGVARS_YES;
6137 /* Now we can finally test the caller's mask against the loop's */
6138 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
6143 switch (loop->lpAsgCall)
6147 /* Can't hoist if the call might have side effect on an indirection. */
6149 if (loop->lpAsgInds != VR_NONE)
6156 case CALLINT_REF_INDIRS:
6158 /* Can't hoist if the call might have side effect on an ref indirection. */
6160 if (loop->lpAsgInds & VR_IND_REF)
6167 case CALLINT_SCL_INDIRS:
6169 /* Can't hoist if the call might have side effect on an non-ref indirection. */
6171 if (loop->lpAsgInds & VR_IND_SCL)
6178 case CALLINT_ALL_INDIRS:
6180 /* Can't hoist if the call might have side effect on any indirection. */
6182 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
6191 /* Other helpers kill nothing */
6196 noway_assert(!"Unexpected lpAsgCall value");
6202 void Compiler::optPerformHoistExpr(GenTree* origExpr, unsigned lnum)
6207 printf("\nHoisting a copy of ");
6208 printTreeID(origExpr);
6209 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
6210 optLoopTable[lnum].lpBottom->bbNum);
6211 gtDispTree(origExpr);
6216 // This loop has to be in a form that is approved for hoisting.
6217 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
6219 // Create a copy of the expression and mark it for CSE's.
6220 GenTree* hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
6222 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
6223 assert(hoistExpr != origExpr);
6224 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
6226 GenTree* hoist = hoistExpr;
6227 // The value of the expression isn't used (unless it's an assignment).
6228 if (hoistExpr->OperGet() != GT_ASG)
6230 hoist = gtUnusedValNode(hoistExpr);
6233 /* Put the statement in the preheader */
6235 fgCreateLoopPreHeader(lnum);
6237 BasicBlock* preHead = optLoopTable[lnum].lpHead;
6238 assert(preHead->bbJumpKind == BBJ_NONE);
6240 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
6241 // (or in this case, will contain) the expression.
6242 compCurBB = preHead;
6244 // Increment the ref counts of any local vars appearing in "hoist".
6245 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
6246 // fold away some of the lcl vars referenced by "hoist".
6247 lvaRecursiveIncRefCounts(hoist);
6249 hoist = fgMorphTree(hoist);
6251 GenTree* hoistStmt = gtNewStmt(hoist);
6252 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
6254 /* simply append the statement at the end of the preHead's list */
6256 GenTree* treeList = preHead->bbTreeList;
6260 /* append after last statement */
6262 GenTree* last = treeList->gtPrev;
6263 assert(last->gtNext == nullptr);
6265 last->gtNext = hoistStmt;
6266 hoistStmt->gtPrev = last;
6267 treeList->gtPrev = hoistStmt;
6271 /* Empty pre-header - store the single statement in the block */
6273 preHead->bbTreeList = hoistStmt;
6274 hoistStmt->gtPrev = hoistStmt;
6277 hoistStmt->gtNext = nullptr;
6282 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
6287 if (fgStmtListThreaded)
6289 gtSetStmtInfo(hoistStmt);
6290 fgSetStmtSeq(hoistStmt);
6294 if (m_nodeTestData != nullptr)
6297 // What is the depth of the loop "lnum"?
6299 unsigned lnumIter = lnum;
6300 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
6303 lnumIter = optLoopTable[lnumIter].lpParent;
6306 NodeToTestDataMap* testData = GetNodeTestData();
6308 TestLabelAndNum tlAndN;
6309 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
6311 if (tlAndN.m_num == -1)
6314 printTreeID(origExpr);
6315 printf(" was declared 'do not hoist', but is being hoisted.\n");
6318 else if (tlAndN.m_num != depth)
6321 printTreeID(origExpr);
6322 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
6324 tlAndN.m_num, depth);
6329 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
6330 // hoist" annotations.
6331 testData->Remove(origExpr);
6332 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
6333 tlAndN.m_tl = TL_CSE_Def;
6334 tlAndN.m_num = m_loopHoistCSEClass++;
6335 testData->Set(hoistExpr, tlAndN);
6341 #if LOOP_HOIST_STATS
6342 if (!m_curLoopHasHoistedExpression)
6344 m_loopsWithHoistedExpressions++;
6345 m_curLoopHasHoistedExpression = true;
6347 m_totalHoistedExpressions++;
6348 #endif // LOOP_HOIST_STATS
6351 void Compiler::optHoistLoopCode()
6353 // If we don't have any loops in the method then take an early out now.
6354 if (optLoopCount == 0)
6360 unsigned jitNoHoist = JitConfig.JitNoHoist();
6368 // The code in this #if has been useful in debugging loop cloning issues, by
6369 // enabling selective enablement of the loop cloning optimization according to
6372 unsigned methHash = info.compMethodHash();
6373 char* lostr = getenv("loophoisthashlo");
6374 unsigned methHashLo = 0;
6377 sscanf_s(lostr, "%x", &methHashLo);
6378 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
6380 char* histr = getenv("loophoisthashhi");
6381 unsigned methHashHi = UINT32_MAX;
6384 sscanf_s(histr, "%x", &methHashHi);
6385 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
6387 if (methHash < methHashLo || methHash > methHashHi)
6389 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
6391 #endif // 0 -- debugging loop cloning issues
6396 printf("\n*************** In optHoistLoopCode()\n");
6397 printf("Blocks/Trees before phase\n");
6398 fgDispBasicBlocks(true);
6403 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
6404 // they are invariant.)
6405 LoopHoistContext hoistCtxt(this);
6406 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
6408 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6413 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6415 optHoistLoopNest(lnum, &hoistCtxt);
6424 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
6425 fgDispBasicBlocks(true);
6429 // Make sure that the predecessor lists are accurate
6430 fgDebugCheckBBlist();
6435 // Test Data stuff..
6436 // If we have no test data, early out.
6437 if (m_nodeTestData == nullptr)
6441 NodeToTestDataMap* testData = GetNodeTestData();
6442 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
6444 TestLabelAndNum tlAndN;
6445 GenTree* node = ki.Get();
6446 bool b = testData->Lookup(node, &tlAndN);
6448 if (tlAndN.m_tl != TL_LoopHoist)
6452 // Otherwise, it is a loop hoist annotation.
6453 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
6454 if (tlAndN.m_num >= 0)
6458 printf(" was declared 'must hoist', but has not been hoisted.\n");
6465 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
6467 // Do this loop, then recursively do all nested loops.
6468 CLANG_FORMAT_COMMENT_ANCHOR;
6470 #if LOOP_HOIST_STATS
6472 m_curLoopHasHoistedExpression = false;
6473 m_loopsConsidered++;
6474 #endif // LOOP_HOIST_STATS
6476 optHoistThisLoop(lnum, hoistCtxt);
6478 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
6480 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
6482 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
6483 // TODO-Cleanup: we should have a set abstraction for loops.
6484 if (hoistedInCurLoop != nullptr)
6486 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
6490 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
6492 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
6496 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
6497 child = optLoopTable[child].lpSibling)
6499 optHoistLoopNest(child, hoistCtxt);
6503 // TODO-Cleanup: we should have a set abstraction for loops.
6504 if (hoistedInCurLoop != nullptr)
6506 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
6508 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
6509 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
6515 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
6517 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6519 /* If loop was removed continue */
6521 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
6526 /* Get the head and tail of the loop */
6528 BasicBlock* head = pLoopDsc->lpHead;
6529 BasicBlock* tail = pLoopDsc->lpBottom;
6530 BasicBlock* lbeg = pLoopDsc->lpEntry;
6532 // We must have a do-while loop
6533 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
6538 // The loop-head must dominate the loop-entry.
6539 // TODO-CQ: Couldn't we make this true if it's not?
6540 if (!fgDominate(head, lbeg))
6545 // if lbeg is the start of a new try block then we won't be able to hoist
6546 if (!BasicBlock::sameTryRegion(head, lbeg))
6551 // We don't bother hoisting when inside of a catch block
6552 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
6557 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
6559 unsigned begn = lbeg->bbNum;
6560 unsigned endn = tail->bbNum;
6562 // Ensure the per-loop sets/tables are empty.
6563 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
6568 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
6569 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
6573 VARSET_TP loopVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
6575 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
6576 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
6577 pLoopDsc->lpHoistedExprCount = 0;
6579 #ifndef _TARGET_64BIT_
6580 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
6582 if (longVarsCount > 0)
6584 // Since 64-bit variables take up two registers on 32-bit targets, we increase
6585 // the Counts such that each TYP_LONG variable counts twice.
6587 VARSET_TP loopLongVars(VarSetOps::Intersection(this, loopVars, lvaLongVars));
6588 VARSET_TP inOutLongVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
6593 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
6594 lvaDispVarSet(lvaLongVars);
6597 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
6598 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
6600 #endif // !_TARGET_64BIT_
6605 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
6606 lvaDispVarSet(pLoopDsc->lpVarUseDef);
6608 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
6609 lvaDispVarSet(pLoopDsc->lpVarInOut);
6611 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
6612 lvaDispVarSet(loopVars);
6617 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
6619 if (floatVarsCount > 0)
6621 VARSET_TP loopFPVars(VarSetOps::Intersection(this, loopVars, lvaFloatVars));
6622 VARSET_TP inOutFPVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
6624 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
6625 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
6626 pLoopDsc->lpHoistedFPExprCount = 0;
6628 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
6629 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
6634 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
6635 lvaDispVarSet(inOutFPVars);
6637 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
6638 lvaDispVarSet(loopFPVars);
6642 else // (floatVarsCount == 0)
6644 pLoopDsc->lpLoopVarFPCount = 0;
6645 pLoopDsc->lpVarInOutFPCount = 0;
6646 pLoopDsc->lpHoistedFPExprCount = 0;
6649 // Find the set of definitely-executed blocks.
6650 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
6651 // Until we have post-dominators, we'll special-case for single-exit blocks.
6652 JitExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
6653 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
6655 assert(pLoopDsc->lpExit != nullptr);
6656 BasicBlock* cur = pLoopDsc->lpExit;
6657 // Push dominators, until we reach "entry" or exit the loop.
6658 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
6663 // If we didn't reach the entry block, give up and *just* push the entry block.
6664 if (cur != pLoopDsc->lpEntry)
6668 defExec.Push(pLoopDsc->lpEntry);
6670 else // More than one exit
6672 // We'll assume that only the entry block is definitely executed.
6673 // We could in the future do better.
6674 defExec.Push(pLoopDsc->lpEntry);
6677 while (defExec.Size() > 0)
6679 // Consider in reverse order: dominator before dominatee.
6680 BasicBlock* blk = defExec.Pop();
6681 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
6685 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
6686 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
6688 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6689 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
6690 unsigned blkWeight = blk->getBBWeight(this);
6695 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
6696 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
6697 firstBlockAndBeforeSideEffect ? "true" : "false");
6698 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6700 printf(" block weight is too small to perform hoisting.\n");
6705 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6707 // Block weight is too small to perform hoisting.
6711 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6713 GenTree* stmtTree = stmt->gtStmtExpr;
6715 bool cctorDependent;
6716 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable,
6720 // we will try to hoist the top-level stmtTree
6721 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6726 bool Compiler::optIsProfitableToHoistableTree(GenTree* tree, unsigned lnum)
6728 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6730 bool loopContainsCall = pLoopDsc->lpContainsCall;
6733 int hoistedExprCount;
6737 if (varTypeIsFloating(tree->TypeGet()))
6739 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6740 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6741 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6743 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6744 if (!loopContainsCall)
6746 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6749 // For ARM each double takes two FP registers
6750 // For now on ARM we won't track singles/doubles
6751 // and instead just assume that we always have doubles.
6758 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6759 loopVarCount = pLoopDsc->lpLoopVarCount;
6760 varInOutCount = pLoopDsc->lpVarInOutCount;
6762 availRegCount = CNT_CALLEE_SAVED - 1;
6763 if (!loopContainsCall)
6765 availRegCount += CNT_CALLEE_TRASH - 1;
6767 #ifndef _TARGET_64BIT_
6768 // For our 32-bit targets Long types take two registers.
6769 if (varTypeIsLong(tree->TypeGet()))
6771 availRegCount = (availRegCount + 1) / 2;
6776 // decrement the availRegCount by the count of expression that we have already hoisted.
6777 availRegCount -= hoistedExprCount;
6779 // the variables that are read/written inside the loop should
6780 // always be a subset of the InOut variables for the loop
6781 assert(loopVarCount <= varInOutCount);
6783 // When loopVarCount >= availRegCount we believe that all of the
6784 // available registers will get used to hold LclVars inside the loop.
6785 // This pessimistically assumes that each loopVar has a conflicting
6786 // lifetime with every other loopVar.
6787 // For this case we will hoist the expression only if is profitable
6788 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6789 // as we believe it will be placed in the stack or one of the other
6790 // loopVars will be spilled into the stack
6792 if (loopVarCount >= availRegCount)
6794 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6795 if (tree->gtCostEx < (2 * IND_COST_EX))
6801 // When varInOutCount < availRegCount we are know that there are
6802 // some available register(s) when we enter the loop body.
6803 // When varInOutCount == availRegCount there often will be a register
6804 // available when we enter the loop body, since a loop often defines a
6805 // LclVar on exit or there is often at least one LclVar that is worth
6806 // spilling to the stack to make way for this hoisted expression.
6807 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6809 if (varInOutCount > availRegCount)
6811 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6812 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6822 // This function returns true if 'tree' is a loop invariant expression.
6823 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block,
6824 // and sets '*pCctorDependent' if 'tree' is a function of a static field that must not be
6825 // hoisted (even if '*pHoistable' is true) unless a preceding corresponding cctor init helper
6826 // call is also hoisted.
6828 bool Compiler::optHoistLoopExprsForTree(GenTree* tree,
6830 LoopHoistContext* hoistCtxt,
6831 bool* pFirstBlockAndBeforeSideEffect,
6833 bool* pCctorDependent)
6835 // First do the children.
6836 // We must keep track of whether each child node was hoistable or not
6838 unsigned nChildren = tree->NumChildren();
6839 bool childrenHoistable[GenTree::MAX_CHILDREN];
6840 bool childrenCctorDependent[GenTree::MAX_CHILDREN];
6842 // Initialize the array elements for childrenHoistable[] to false
6843 for (unsigned i = 0; i < nChildren; i++)
6845 childrenHoistable[i] = false;
6846 childrenCctorDependent[i] = false;
6849 // Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees.
6850 // In the IconHandle case, it's of course the dereference, rather than the constant itself, that is
6851 // truly dependent on the cctor. So a more precise approach would be to separately propagate
6852 // isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for simplicity/throughput;
6853 // the constant itself would be considered non-hoistable anyway, since optIsCSEcandidate returns
6854 // false for constants.
6855 bool treeIsCctorDependent = ((tree->OperIs(GT_CLS_VAR) && ((tree->gtFlags & GTF_CLS_VAR_INITCLASS) != 0)) ||
6856 (tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != 0)));
6857 bool treeIsInvariant = true;
6858 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6860 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6861 &childrenHoistable[childNum], &childrenCctorDependent[childNum]))
6863 treeIsInvariant = false;
6866 if (childrenCctorDependent[childNum])
6868 // Normally, a parent of a cctor-dependent tree is also cctor-dependent.
6869 treeIsCctorDependent = true;
6871 // Check for the case where we can stop propagating cctor-dependent upwards.
6872 if (tree->OperIs(GT_COMMA) && (childNum == 1))
6874 GenTree* op1 = tree->gtGetOp1();
6875 if (op1->OperIs(GT_CALL))
6877 GenTreeCall* call = op1->AsCall();
6878 if ((call->gtCallType == CT_HELPER) &&
6879 s_helperCallProperties.MayRunCctor(eeGetHelperNum(call->gtCallMethHnd)))
6881 // Hoisting the comma is ok because it would hoist the initialization along
6882 // with the static field reference.
6883 treeIsCctorDependent = false;
6884 // Hoisting the static field without hoisting the initialization would be
6885 // incorrect, make sure we consider the field (which we flagged as
6886 // cctor-dependent) non-hoistable.
6887 noway_assert(!childrenHoistable[childNum]);
6894 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted,
6895 // unless it has a static var reference that can't be hoisted past its cctor call.
6896 bool treeIsHoistable = treeIsInvariant && !treeIsCctorDependent;
6898 // But we must see if anything else prevents "tree" from being hoisted.
6900 if (treeIsInvariant)
6902 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6903 treeIsHoistable &= optIsCSEcandidate(tree);
6905 // If it's a call, it must be a helper call, and be pure.
6906 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6907 // (meaning it won't run a cctor because the class is not precise-init).
6908 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6910 GenTreeCall* call = tree->AsCall();
6911 if (call->gtCallType != CT_HELPER)
6913 treeIsHoistable = false;
6917 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6918 if (!s_helperCallProperties.IsPure(helpFunc))
6920 treeIsHoistable = false;
6922 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6924 treeIsHoistable = false;
6929 if (treeIsHoistable)
6931 if (!(*pFirstBlockAndBeforeSideEffect))
6933 // For now, we give up on an expression that might raise an exception if it is after the
6934 // first possible global side effect (and we assume we're after that if we're not in the first block).
6935 // TODO-CQ: this is when we might do loop cloning.
6937 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6939 treeIsHoistable = false;
6944 // Is the value of the whole tree loop invariant?
6946 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6948 // Is the value of the whole tree loop invariant?
6949 if (!treeIsInvariant)
6951 treeIsHoistable = false;
6955 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6956 // If we encounter a tree with a call in it
6957 // or if we see an assignment to global we set it to false.
6959 // If we are already set to false then we can skip these checks
6961 if (*pFirstBlockAndBeforeSideEffect)
6963 // For this purpose, we only care about memory side effects. We assume that expressions will
6964 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6965 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6966 // here, since that includes exceptions.)
6969 // If it's a call, it must be a helper call that does not mutate the heap.
6970 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6971 // (meaning it won't run a cctor because the class is not precise-init).
6972 GenTreeCall* call = tree->AsCall();
6973 if (call->gtCallType != CT_HELPER)
6975 *pFirstBlockAndBeforeSideEffect = false;
6979 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6980 if (s_helperCallProperties.MutatesHeap(helpFunc))
6982 *pFirstBlockAndBeforeSideEffect = false;
6984 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6986 *pFirstBlockAndBeforeSideEffect = false;
6990 else if (tree->OperIsAssignment())
6992 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6993 GenTree* lhs = tree->gtOp.gtOp1;
6994 if (lhs->gtFlags & GTF_GLOB_REF)
6996 *pFirstBlockAndBeforeSideEffect = false;
6999 else if (tree->OperIsCopyBlkOp())
7001 GenTree* args = tree->gtOp.gtOp1;
7002 assert(args->OperGet() == GT_LIST);
7003 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
7005 *pFirstBlockAndBeforeSideEffect = false;
7010 // If this 'tree' is hoistable then we return and the caller will
7011 // decide to hoist it as part of larger hoistable expression.
7013 if (!treeIsHoistable)
7015 // We are not hoistable so we will now hoist any hoistable children.
7017 for (unsigned childNum = 0; childNum < nChildren; childNum++)
7019 if (childrenHoistable[childNum])
7021 // We can't hoist the LHS of an assignment, isn't a real use.
7022 if (childNum == 0 && (tree->OperIsAssignment()))
7027 GenTree* child = tree->GetChild(childNum);
7029 // We try to hoist this 'child' tree
7030 optHoistCandidate(child, lnum, hoistCtxt);
7035 *pHoistable = treeIsHoistable;
7036 *pCctorDependent = treeIsCctorDependent;
7037 return treeIsInvariant;
7040 void Compiler::optHoistCandidate(GenTree* tree, unsigned lnum, LoopHoistContext* hoistCtxt)
7042 if (lnum == BasicBlock::NOT_IN_LOOP)
7044 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
7048 // The outer loop also must be suitable for hoisting...
7049 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
7054 // If the hoisted expression isn't valid at this loop head then break
7055 if (!optTreeIsValidAtLoopHead(tree, lnum))
7060 // It must pass the hoistable profitablity tests for this loop level
7061 if (!optIsProfitableToHoistableTree(tree, lnum))
7067 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
7069 // already hoisted in a parent loop, so don't hoist this expression.
7073 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
7075 // already hoisted this expression in the current loop, so don't hoist this expression.
7079 // Expression can be hoisted
7080 optPerformHoistExpr(tree, lnum);
7082 // Increment lpHoistedExprCount or lpHoistedFPExprCount
7083 if (!varTypeIsFloating(tree->TypeGet()))
7085 optLoopTable[lnum].lpHoistedExprCount++;
7086 #ifndef _TARGET_64BIT_
7087 // For our 32-bit targets Long types take two registers.
7088 if (varTypeIsLong(tree->TypeGet()))
7090 optLoopTable[lnum].lpHoistedExprCount++;
7094 else // Floating point expr hoisted
7096 optLoopTable[lnum].lpHoistedFPExprCount++;
7099 // Record the hoisted expression in hoistCtxt
7100 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
7103 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
7105 // If it is not a VN, is not loop-invariant.
7106 if (vn == ValueNumStore::NoVN)
7111 // We'll always short-circuit constants.
7112 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
7117 // If we've done this query previously, don't repeat.
7118 bool previousRes = false;
7119 if (loopVnInvariantCache->Lookup(vn, &previousRes))
7126 if (vnStore->GetVNFunc(vn, &funcApp))
7128 if (funcApp.m_func == VNF_PhiDef)
7130 // First, make sure it's a "proper" phi -- the definition is a Phi application.
7131 VNFuncApp phiDefValFuncApp;
7132 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
7134 // It's not *really* a definition, rather a pass-through of some other VN.
7135 // (This could occur, say if both sides of an if-then-else diamond made the
7136 // same assignment to a variable.)
7137 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
7141 // Is the definition within the loop? If so, is not loop-invariant.
7142 unsigned lclNum = funcApp.m_args[0];
7143 unsigned ssaNum = funcApp.m_args[1];
7144 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
7145 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
7148 else if (funcApp.m_func == VNF_PhiMemoryDef)
7150 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
7151 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
7155 for (unsigned i = 0; i < funcApp.m_arity; i++)
7157 // TODO-CQ: We need to either make sure that *all* VN functions
7158 // always take VN args, or else have a list of arg positions to exempt, as implicitly
7160 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
7170 // Non-function "new, unique" VN's may be annotated with the loop nest where
7171 // their definition occurs.
7172 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
7174 if (vnLoopNum == MAX_LOOP_NUM)
7180 res = !optLoopContains(lnum, vnLoopNum);
7184 loopVnInvariantCache->Set(vn, res);
7188 bool Compiler::optTreeIsValidAtLoopHead(GenTree* tree, unsigned lnum)
7190 if (tree->OperIsLocal())
7192 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
7193 unsigned lclNum = lclVar->gtLclNum;
7195 // The lvlVar must be have an Ssa tracked lifetime
7196 if (fgExcludeFromSsa(lclNum))
7201 // If the loop does not contains the SSA def we can hoist it.
7202 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
7207 else if (tree->OperIsConst())
7211 else // If every one of the children nodes are valid at this Loop's Head.
7213 unsigned nChildren = tree->NumChildren();
7214 for (unsigned childNum = 0; childNum < nChildren; childNum++)
7216 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
7226 /*****************************************************************************
7228 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
7229 * header. The pre-header will replace the current lpHead in the loop table.
7230 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
7231 * will also be dominated by the loop-top, lpHead->bbNext.
7235 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
7237 LoopDsc* pLoopDsc = &optLoopTable[lnum];
7239 /* This loop has to be a "do-while" loop */
7241 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
7243 /* Have we already created a loop-preheader block? */
7245 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
7250 BasicBlock* head = pLoopDsc->lpHead;
7251 BasicBlock* top = pLoopDsc->lpTop;
7252 BasicBlock* entry = pLoopDsc->lpEntry;
7254 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
7255 if (!BasicBlock::sameTryRegion(head, entry))
7260 // Ensure that lpHead always dominates lpEntry
7262 noway_assert(fgDominate(head, entry));
7264 /* Get hold of the first block of the loop body */
7266 assert(top == entry);
7268 /* Allocate a new basic block */
7270 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
7271 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
7273 // Must set IL code offset
7274 preHead->bbCodeOffs = top->bbCodeOffs;
7276 // Set the default value of the preHead weight in case we don't have
7277 // valid profile data and since this blocks weight is just an estimate
7278 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
7280 preHead->inheritWeight(head);
7281 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
7286 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
7287 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
7291 // The preheader block is part of the containing loop (if any).
7292 preHead->bbNatLoopNum = pLoopDsc->lpParent;
7294 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
7296 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
7298 preHead->bbWeight = 0;
7299 preHead->bbFlags |= BBF_RUN_RARELY;
7303 bool allValidProfileWeights =
7304 (head->hasProfileWeight() && head->bbJumpDest->hasProfileWeight() && head->bbNext->hasProfileWeight());
7306 if (allValidProfileWeights)
7308 double loopEnteredCount;
7309 double loopSkippedCount;
7311 if (fgHaveValidEdgeWeights)
7313 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
7314 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
7315 noway_assert(edgeToNext != nullptr);
7316 noway_assert(edgeToJump != nullptr);
7319 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
7321 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
7325 loopEnteredCount = (double)head->bbNext->bbWeight;
7326 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
7329 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
7331 // Calculate a good approximation of the preHead's block weight
7332 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
7333 preHead->setBBWeight(max(preHeadWeight, 1));
7334 noway_assert(!preHead->isRunRarely());
7339 // Link in the preHead block.
7340 fgInsertBBbefore(top, preHead);
7342 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
7343 // However, that is too expensive at this point. Instead, we update the phi
7344 // node block references, if we created pre-header block due to hoisting.
7345 // This is sufficient because any definition participating in SSA that flowed
7346 // into the phi via the loop header block will now flow through the preheader
7347 // block from the header block.
7349 for (GenTree* stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
7351 GenTree* tree = stmt->gtStmt.gtStmtExpr;
7352 if (tree->OperGet() != GT_ASG)
7356 GenTree* op2 = tree->gtGetOp2();
7357 if (op2->OperGet() != GT_PHI)
7361 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
7362 while (args != nullptr)
7364 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
7365 if (phiArg->gtPredBB == head)
7367 phiArg->gtPredBB = preHead;
7369 args = args->Rest();
7373 // The handler can't begin at the top of the loop. If it did, it would be incorrect
7374 // to set the handler index on the pre header without updating the exception table.
7375 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
7377 // Update the EH table to make the hoisted block part of the loop's EH block.
7378 fgExtendEHRegionBefore(top);
7380 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
7381 // (e.g: hoisting expression in a loop with the same 'head' as this one)
7383 /* Update the loop entry */
7385 pLoopDsc->lpHead = preHead;
7386 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
7388 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
7389 All predecessors of 'beg', (which is the entry in the loop)
7390 now have to jump to 'preHead', unless they are dominated by 'head' */
7392 preHead->bbRefs = 0;
7393 fgAddRefPred(preHead, head);
7394 bool checkNestedLoops = false;
7396 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
7398 BasicBlock* predBlock = pred->flBlock;
7400 if (fgDominate(top, predBlock))
7402 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
7403 // (we know that 'head' dominates 'top'), but using 'top' instead of
7404 // 'head' in the test allows us to not enter here if 'predBlock == head'
7406 if (predBlock != pLoopDsc->lpBottom)
7408 noway_assert(predBlock != head);
7409 checkNestedLoops = true;
7414 switch (predBlock->bbJumpKind)
7417 noway_assert(predBlock == head);
7421 if (predBlock == head)
7423 noway_assert(predBlock->bbJumpDest != top);
7429 case BBJ_EHCATCHRET:
7430 noway_assert(predBlock->bbJumpDest == top);
7431 predBlock->bbJumpDest = preHead;
7432 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
7434 if (predBlock == head)
7436 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
7437 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
7438 // Just break, pred will be removed after switch.
7442 fgRemoveRefPred(top, predBlock);
7443 fgAddRefPred(preHead, predBlock);
7449 jumpCnt = predBlock->bbJumpSwt->bbsCount;
7450 BasicBlock** jumpTab;
7451 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
7456 if ((*jumpTab) == top)
7458 (*jumpTab) = preHead;
7460 fgRemoveRefPred(top, predBlock);
7461 fgAddRefPred(preHead, predBlock);
7462 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
7464 } while (++jumpTab, --jumpCnt);
7467 noway_assert(!"Unexpected bbJumpKind");
7472 noway_assert(!fgGetPredForBlock(top, preHead));
7473 fgRemoveRefPred(top, head);
7474 fgAddRefPred(top, preHead);
7477 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
7478 (other than the back-edge of the loop we are considering) then we likely have nested
7479 do-while loops with the same entry block and inserting the preheader block changes the head
7480 of all the nested loops. Now we will update this piece of information in the loop table, and
7481 mark all nested loops as having a preheader (the preheader block can be shared among all nested
7482 do-while loops with the same entry block).
7484 if (checkNestedLoops)
7486 for (unsigned l = 0; l < optLoopCount; l++)
7488 if (optLoopTable[l].lpHead == head)
7490 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
7491 noway_assert(optLoopTable[l].lpEntry == top);
7492 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
7493 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
7497 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
7498 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
7506 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
7508 for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
7510 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
7514 if (optLoopTable[lnum].lpEntry == blk)
7523 void Compiler::optComputeLoopSideEffects()
7526 for (lnum = 0; lnum < optLoopCount; lnum++)
7528 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
7529 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
7530 optLoopTable[lnum].lpContainsCall = false;
7533 for (lnum = 0; lnum < optLoopCount; lnum++)
7535 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
7540 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
7541 { // Is outermost...
7542 optComputeLoopNestSideEffects(lnum);
7546 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
7547 #ifndef _TARGET_64BIT_
7548 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
7551 for (unsigned i = 0; i < lvaCount; i++)
7553 LclVarDsc* varDsc = &lvaTable[i];
7554 if (varDsc->lvTracked)
7556 if (varTypeIsFloating(varDsc->lvType))
7558 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
7560 #ifndef _TARGET_64BIT_
7561 else if (varTypeIsLong(varDsc->lvType))
7563 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
7570 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
7572 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
7573 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
7574 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
7576 optComputeLoopSideEffectsOfBlock(bbInLoop);
7580 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
7582 unsigned mostNestedLoop = blk->bbNatLoopNum;
7583 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
7585 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
7587 // MemoryKinds for which an in-loop call or store has arbitrary effects.
7588 MemoryKindSet memoryHavoc = emptyMemoryKindSet;
7590 // Now iterate over the remaining statements, and their trees.
7591 for (GenTree* stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
7593 for (GenTree* tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
7595 genTreeOps oper = tree->OperGet();
7597 // Even after we set memoryHavoc we still may want to know if a loop contains calls
7598 if (memoryHavoc == fullMemoryKindSet)
7600 if (oper == GT_CALL)
7602 // Record that this loop contains a call
7603 AddContainsCallAllContainingLoops(mostNestedLoop);
7606 // If we just set lpContainsCall or it was previously set
7607 if (optLoopTable[mostNestedLoop].lpContainsCall)
7609 // We can early exit after both memoryHavoc and lpContainsCall are both set to true.
7613 // We are just looking for GT_CALL nodes after memoryHavoc was set.
7617 // otherwise memoryHavoc is not set for at least one heap ID
7618 assert(memoryHavoc != fullMemoryKindSet);
7620 // This body is a distillation of the memory side-effect code of value numbering.
7621 // We also do a very limited analysis if byref PtrTo values, to cover some cases
7622 // that the compiler creates.
7624 if (GenTree::OperIsAssignment(oper))
7626 GenTree* lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
7628 if (lhs->OperGet() == GT_IND)
7630 GenTree* arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
7631 FieldSeqNode* fldSeqArrElem = nullptr;
7633 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
7635 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7641 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
7643 // If it's a local byref for which we recorded a value number, use that...
7644 GenTreeLclVar* argLcl = arg->AsLclVar();
7645 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
7648 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
7650 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
7651 funcApp.m_func == VNF_PtrToArrElem)
7653 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
7654 CORINFO_CLASS_HANDLE elemType =
7655 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
7656 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
7657 // Don't set memoryHavoc for GcHeap below. Do set memoryHavoc for ByrefExposed
7658 // (conservatively assuming that a byref may alias the array element)
7659 memoryHavoc |= memoryKindSet(ByrefExposed);
7664 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7666 // Is the LHS an array index expression?
7667 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
7669 // We actually ignore "fldSeq" -- any modification to an S[], at any
7670 // field of "S", will lose all information about the array type.
7671 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7672 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
7673 // Conservatively assume byrefs may alias this array element
7674 memoryHavoc |= memoryKindSet(ByrefExposed);
7678 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
7680 GenTree* obj = nullptr; // unused
7681 GenTree* staticOffset = nullptr; // unused
7682 FieldSeqNode* fldSeq = nullptr;
7684 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
7685 (fldSeq != FieldSeqStore::NotAField()))
7687 // Get the first (object) field from field seq. GcHeap[field] will yield the "field map".
7688 assert(fldSeq != nullptr);
7689 if (fldSeq->IsFirstElemFieldSeq())
7691 fldSeq = fldSeq->m_next;
7692 assert(fldSeq != nullptr);
7695 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
7696 // Conservatively assume byrefs may alias this object.
7697 memoryHavoc |= memoryKindSet(ByrefExposed);
7701 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7705 else if (lhs->OperIsBlk())
7707 GenTreeLclVarCommon* lclVarTree;
7709 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
7711 // For now, assume arbitrary side effects on GcHeap/ByrefExposed...
7712 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7714 else if (lvaVarAddrExposed(lclVarTree->gtLclNum))
7716 memoryHavoc |= memoryKindSet(ByrefExposed);
7719 else if (lhs->OperGet() == GT_CLS_VAR)
7721 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
7722 // Conservatively assume byrefs may alias this static field
7723 memoryHavoc |= memoryKindSet(ByrefExposed);
7725 // Otherwise, must be local lhs form. I should assert that.
7726 else if (lhs->OperGet() == GT_LCL_VAR)
7728 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
7729 GenTree* rhs = tree->gtOp.gtOp2;
7730 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
7731 // If we gave the RHS a value number, propagate it.
7732 if (rhsVN != ValueNumStore::NoVN)
7734 rhsVN = vnStore->VNNormVal(rhsVN);
7735 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
7737 lvaTable[lhsLcl->GetLclNum()]
7738 .GetPerSsaData(lhsLcl->GetSsaNum())
7739 ->m_vnPair.SetLiberal(rhsVN);
7742 // If the local is address-exposed, count this as ByrefExposed havoc
7743 if (lvaVarAddrExposed(lhsLcl->gtLclNum))
7745 memoryHavoc |= memoryKindSet(ByrefExposed);
7749 else // not GenTree::OperIsAssignment(oper)
7754 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7758 // Is it an addr of a array index expression?
7760 GenTree* addrArg = tree->gtOp.gtOp1;
7761 if (addrArg->OperGet() == GT_IND)
7763 // Is the LHS an array index expression?
7764 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7767 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7769 CORINFO_CLASS_HANDLE elemType =
7770 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7771 tree->gtVNPair.SetBoth(
7772 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7773 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7774 // The rest are dummy arguments.
7775 vnStore->VNForNull(), vnStore->VNForNull(),
7776 vnStore->VNForNull()));
7782 case GT_LOCKADD: // Binop
7783 case GT_XADD: // Binop
7784 case GT_XCHG: // Binop
7785 case GT_CMPXCHG: // Specialop
7787 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7793 GenTreeCall* call = tree->AsCall();
7795 // Record that this loop contains a call
7796 AddContainsCallAllContainingLoops(mostNestedLoop);
7798 if (call->gtCallType == CT_HELPER)
7800 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7801 if (s_helperCallProperties.MutatesHeap(helpFunc))
7803 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7805 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7807 // If the call is labeled as "Hoistable", then we've checked the
7808 // class that would be constructed, and it is not precise-init, so
7809 // the cctor will not be run by this call. Otherwise, it might be,
7810 // and might have arbitrary side effects.
7811 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7813 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7819 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7825 // All other gtOper node kinds, leave 'memoryHavoc' unchanged (i.e. false)
7832 if (memoryHavoc != emptyMemoryKindSet)
7834 // Record that all loops containing this block have memory havoc effects.
7835 unsigned lnum = mostNestedLoop;
7836 while (lnum != BasicBlock::NOT_IN_LOOP)
7838 for (MemoryKind memoryKind : allMemoryKinds())
7840 if ((memoryHavoc & memoryKindSet(memoryKind)) != 0)
7842 optLoopTable[lnum].lpLoopHasMemoryHavoc[memoryKind] = true;
7845 lnum = optLoopTable[lnum].lpParent;
7850 // Marks the containsCall information to "lnum" and any parent loops.
7851 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7853 assert(0 <= lnum && lnum < optLoopCount);
7854 while (lnum != BasicBlock::NOT_IN_LOOP)
7856 optLoopTable[lnum].lpContainsCall = true;
7857 lnum = optLoopTable[lnum].lpParent;
7861 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7862 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7864 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7865 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7867 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7868 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7871 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7872 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7874 assert(0 <= lnum && lnum < optLoopCount);
7875 while (lnum != BasicBlock::NOT_IN_LOOP)
7877 optLoopTable[lnum].AddVariableLiveness(this, blk);
7878 lnum = optLoopTable[lnum].lpParent;
7882 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7883 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7885 assert(0 <= lnum && lnum < optLoopCount);
7886 while (lnum != BasicBlock::NOT_IN_LOOP)
7888 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7889 lnum = optLoopTable[lnum].lpParent;
7893 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7894 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7896 assert(0 <= lnum && lnum < optLoopCount);
7897 while (lnum != BasicBlock::NOT_IN_LOOP)
7899 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7900 lnum = optLoopTable[lnum].lpParent;
7904 /*****************************************************************************
7906 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7907 * The 'keepList'is either a single tree or a list of trees that are formed by
7908 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7909 * gtExtractSideEffList method.
7913 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTree** pTree, fgWalkData* data)
7915 GenTree* tree = *pTree;
7916 Compiler* comp = data->compiler;
7917 GenTree* keepList = (GenTree*)(data->pCallbackData);
7919 // We may have a non-NULL side effect list that is being kept
7923 GenTree* keptTree = keepList;
7924 while (keptTree->OperGet() == GT_COMMA)
7926 assert(keptTree->OperKind() & GTK_SMPOP);
7927 GenTree* op1 = keptTree->gtOp.gtOp1;
7928 GenTree* op2 = keptTree->gtGetOp2();
7930 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7931 // that is being kept because it contains some side-effect
7935 // This tree and all of its sub trees are being kept.
7936 return WALK_SKIP_SUBTREES;
7939 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7940 // which can again be another GT_COMMA or the final side-effect part
7944 if (tree == keptTree)
7946 // This tree and all of its sub trees are being kept.
7947 return WALK_SKIP_SUBTREES;
7951 // This node is being removed from the graph of GenTree*
7953 // Look for any local variable references
7955 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7960 /* This variable ref is going away, decrease its ref counts */
7962 lclNum = tree->gtLclVarCommon.gtLclNum;
7963 assert(lclNum < comp->lvaCount);
7964 varDsc = comp->lvaTable + lclNum;
7966 // make sure it's been initialized
7967 assert(comp->compCurBB != nullptr);
7968 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7970 /* Decrement its lvRefCnt and lvRefCntWtd */
7972 // Use getBBWeight to determine the proper block weight.
7973 // This impacts the block weights when we have IBC data.
7974 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7977 return WALK_CONTINUE;
7980 /*****************************************************************************
7982 * Routine called to decrement the LclVar ref counts when removing a tree
7983 * during the remove RangeCheck phase.
7984 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7985 * unless the node is found in the 'keepList' (which are saved side effects)
7986 * The keepList is communicated using the walkData.pCallbackData field
7987 * Also the compCurBB must be set to the current BasicBlock which contains
7988 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7991 void Compiler::optRemoveTree(GenTree* deadTree, GenTree* keepList)
7993 // We communicate this value using the walkData.pCallbackData field
7995 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7998 //------------------------------------------------------------------------------
7999 // optRemoveRangeCheck : Given an array index node, mark it as not needing a range check.
8002 // tree - Range check tree
8003 // stmt - Statement the tree belongs to
8005 void Compiler::optRemoveRangeCheck(GenTree* tree, GenTree* stmt)
8008 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
8011 noway_assert(stmt->gtOper == GT_STMT);
8012 noway_assert(tree->gtOper == GT_COMMA);
8014 GenTree* bndsChkTree = tree->gtOp.gtOp1;
8016 noway_assert(bndsChkTree->OperIsBoundsCheck());
8018 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
8023 printf("Before optRemoveRangeCheck:\n");
8028 GenTree* sideEffList = nullptr;
8030 gtExtractSideEffList(bndsChkTree, &sideEffList, GTF_ASG);
8032 // Decrement the ref counts for any LclVars that are being deleted
8034 optRemoveTree(bndsChkTree, sideEffList);
8036 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
8037 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
8038 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
8039 tree->gtFlags |= GTF_DONT_CSE;
8041 gtUpdateSideEffects(stmt, tree);
8043 /* Recalculate the gtCostSz, etc... */
8044 gtSetStmtInfo(stmt);
8046 /* Re-thread the nodes if necessary */
8047 if (fgStmtListThreaded)
8055 printf("After optRemoveRangeCheck:\n");
8061 /*****************************************************************************
8062 * Return the scale in an array reference, given a pointer to the
8063 * multiplication node.
8066 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk))
8069 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
8070 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
8072 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
8074 if (mul->gtOper == GT_LSH)
8076 scale = ((ssize_t)1) << scale;
8079 GenTree* index = mul->gtOp.gtOp1;
8081 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
8083 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
8084 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
8085 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
8086 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
8087 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
8088 index = index->gtOp.gtOp1;
8091 assert(!bRngChk || index->gtOper != GT_COMMA);
8101 /*****************************************************************************
8102 * Find the last assignment to of the local variable in the block. Return
8103 * RHS or NULL. If any local variable in the RHS has been killed in
8104 * intervening code, return NULL. If the variable being searched for is killed
8105 * in the intervening code, return NULL.
8109 GenTree* Compiler::optFindLocalInit(BasicBlock* block,
8111 VARSET_TP* pKilledInOut,
8112 bool* pLhsRhsKilledAfterInit)
8114 assert(pKilledInOut);
8115 assert(pLhsRhsKilledAfterInit);
8117 *pLhsRhsKilledAfterInit = false;
8119 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
8121 GenTree* list = block->bbTreeList;
8122 if (list == nullptr)
8127 GenTree* rhs = nullptr;
8128 GenTree* stmt = list;
8131 stmt = stmt->gtPrev;
8132 if (stmt == nullptr)
8137 GenTree* tree = stmt->gtStmt.gtStmtExpr;
8138 // If we encounter an assignment to a local variable,
8139 if (tree->OperIsAssignment() && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
8141 // And the assigned variable equals the input local,
8142 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
8144 // If the assignment is '=' and it is not a conditional, then return rhs.
8145 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
8147 rhs = tree->gtOp.gtOp2;
8149 // If the assignment is 'op=' or a conditional equal, then the search ends here,
8150 // as we found a kill to the input local.
8153 *pLhsRhsKilledAfterInit = true;
8154 assert(rhs == nullptr);
8160 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
8161 if (varDsc == nullptr)
8165 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
8168 } while (stmt != list);
8175 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
8176 varRefKinds rhsRefs = VR_NONE;
8177 VARSET_TP rhsLocals(VarSetOps::UninitVal());
8178 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
8179 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
8181 // If RHS has been indirectly referenced, consider it a write and a kill.
8182 *pLhsRhsKilledAfterInit = true;
8189 //------------------------------------------------------------------------------
8190 // optObtainLoopCloningOpts: Identify optimization candidates and update
8191 // the "context" for array optimizations.
8194 // context - data structure where all loop cloning info is kept. The
8195 // optInfo fields of the context are updated with the
8196 // identified optimization candidates.
8198 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
8200 for (unsigned i = 0; i < optLoopCount; i++)
8202 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
8203 if (optIsLoopClonable(i))
8205 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
8207 optIdentifyLoopOptInfo(i, context);
8210 JITDUMP("------------------------------------------------------------\n");
8215 //------------------------------------------------------------------------
8216 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
8217 // check if the loop is suitable for the optimizations performed.
8220 // loopNum - the current loop index for which conditions are derived.
8221 // context - data structure where all loop cloning candidates will be
8225 // If the loop is not suitable for the optimizations, return false - context
8226 // should not contain any optimization candidate for the loop if false.
8227 // Else return true.
8230 // Check if the loop is well formed for this optimization and identify the
8231 // optimization candidates and update the "context" parameter with all the
8232 // contextual information necessary to perform the optimization later.
8234 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
8236 noway_assert(loopNum < optLoopCount);
8238 LoopDsc* pLoop = &optLoopTable[loopNum];
8240 if (!(pLoop->lpFlags & LPFLG_ITER))
8242 JITDUMP("> No iter flag on loop %d.\n", loopNum);
8246 unsigned ivLclNum = pLoop->lpIterVar();
8247 if (lvaVarAddrExposed(ivLclNum))
8249 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
8253 BasicBlock* head = pLoop->lpHead;
8254 BasicBlock* end = pLoop->lpBottom;
8255 BasicBlock* beg = head->bbNext;
8257 if (end->bbJumpKind != BBJ_COND)
8259 JITDUMP("> Couldn't find termination test.\n");
8263 if (end->bbJumpDest != beg)
8265 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
8269 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
8271 #ifdef LEGACY_BACKEND
8272 pLoop->lpIterOper() != GT_ASG_ADD &&
8274 pLoop->lpIterOper() != GT_ADD) ||
8275 (pLoop->lpIterConst() != 1))
8277 JITDUMP("> Loop iteration operator not matching\n");
8281 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
8282 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
8284 JITDUMP("> Loop limit is neither constant, variable or array length\n");
8288 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD
8289 #ifdef LEGACY_BACKEND
8290 || pLoop->lpIterOper() == GT_ASG_ADD
8293 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB
8294 #ifdef LEGACY_BACKEND
8295 || pLoop->lpIterOper() == GT_ASG_SUB
8299 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
8300 GenTree::OpName(pLoop->lpTestOper()), GenTree::OpName(pLoop->lpIterOper()));
8304 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
8306 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
8307 pLoop->lpTestTree->gtTreeID);
8312 GenTree* op1 = pLoop->lpIterator();
8313 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
8316 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
8317 end->bbNext ? end->bbNext->bbNum : 0);
8319 LoopCloneVisitorInfo info(context, loopNum, nullptr);
8320 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
8323 for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
8326 const bool lclVarsOnly = false;
8327 const bool computeStack = false;
8328 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly,
8336 //---------------------------------------------------------------------------------------------------------------
8337 // optExtractArrIndex: Try to extract the array index from "tree".
8340 // tree the tree to be checked if it is the array [] operation.
8341 // result the extracted GT_INDEX information is updated in result.
8342 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
8345 // Returns true if array index can be extracted, else, return false. See assumption about
8346 // what will be extracted. The "result" variable's rank parameter is advanced for every
8347 // dimension of [] encountered.
8350 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
8351 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
8352 // to reconstruct this to be able to know if this is an array access.
8355 // The method extracts only if the array base and indices are GT_LCL_VAR.
8357 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
8359 // [000000001AF828D8] ---XG------- indir int
8360 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
8361 // [000000001AF87340] ------------ + byref
8362 // [000000001AF87160] -------N---- const long 2
8363 // [000000001AF871D8] ------------ << long
8364 // [000000001AF870C0] ------------ cast long <- int
8365 // [000000001AF86F30] i----------- lclVar int V04 loc0
8366 // [000000001AF87250] ------------ + byref
8367 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
8368 // [000000001AF87468] ---XG------- comma int
8369 // [000000001AF87020] ---X-------- arrBndsChk void
8370 // [000000001AF86FA8] ---X-------- arrLen int
8371 // [000000001AF827E8] ------------ lclVar ref V01 arg1
8372 // [000000001AF82860] ------------ lclVar int V04 loc0
8373 // [000000001AF829F0] -A-XG------- = int
8374 // [000000001AF82978] D------N---- lclVar int V06 tmp0
8376 bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
8378 if (tree->gtOper != GT_COMMA)
8382 GenTree* before = tree->gtGetOp1();
8383 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
8387 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
8388 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
8393 // For span we may see gtArrLen is a local var or local field.
8394 // We won't try and extract those.
8395 const genTreeOps arrayOp = arrBndsChk->gtArrLen->gtOper;
8397 if ((arrayOp == GT_LCL_VAR) || (arrayOp == GT_LCL_FLD))
8401 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
8405 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
8406 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
8411 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
8413 GenTree* after = tree->gtGetOp2();
8415 if (after->gtOper != GT_IND)
8419 // It used to be the case that arrBndsChks for struct types would fail the previous check because
8420 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
8421 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
8422 // that we were not previously cloning.)
8423 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
8425 if (varTypeIsStruct(after))
8430 GenTree* sibo = after->gtGetOp1();
8431 if (sibo->gtOper != GT_ADD)
8435 GenTree* sib = sibo->gtGetOp1();
8436 GenTree* ofs = sibo->gtGetOp2();
8437 if (ofs->gtOper != GT_CNS_INT)
8441 if (sib->gtOper != GT_ADD)
8445 GenTree* si = sib->gtGetOp2();
8446 GenTree* base = sib->gtGetOp1();
8447 if (si->gtOper != GT_LSH)
8451 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
8455 GenTree* scale = si->gtGetOp2();
8456 GenTree* index = si->gtGetOp1();
8457 if (scale->gtOper != GT_CNS_INT)
8461 #ifdef _TARGET_AMD64_
8462 if (index->gtOper != GT_CAST)
8466 GenTree* indexVar = index->gtGetOp1();
8468 GenTree* indexVar = index;
8470 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
8474 if (lhsNum == BAD_VAR_NUM)
8476 result->arrLcl = arrLcl;
8478 result->indLcls.Push(indLcl);
8479 result->bndsChks.Push(tree);
8480 result->useBlock = compCurBB;
8486 //---------------------------------------------------------------------------------------------------------------
8487 // optReconstructArrIndex: Reconstruct array index.
8490 // tree the tree to be checked if it is an array [][][] operation.
8491 // result the extracted GT_INDEX information.
8492 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
8495 // Returns true if array index can be extracted, else, return false. "rank" field in
8496 // "result" contains the array access depth. The "indLcls" fields contain the indices.
8499 // Recursively look for a list of array indices. In the example below, we encounter,
8500 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
8501 // The return value would then be:
8502 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
8504 // V00[V01][V02] would be morphed as:
8506 // [000000001B366848] ---XG------- indir int
8507 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
8508 // [000000001B36C200] ---XG------- comma int
8509 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
8510 // [000000001B36C278] -A-XG------- comma int
8511 // [000000001B366730] R--XG------- indir ref
8512 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
8513 // [000000001B36C818] ---XG------- comma ref
8514 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
8515 // [000000001B36BB60] -A-XG------- = ref
8516 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
8517 // [000000001B36A668] -A-XG------- = int
8518 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
8521 // The method extracts only if the array base and indices are GT_LCL_VAR.
8523 bool Compiler::optReconstructArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
8525 // If we can extract "tree" (which is a top level comma) return.
8526 if (optExtractArrIndex(tree, result, lhsNum))
8530 // We have a comma (check if array base expr is computed in "before"), descend further.
8531 else if (tree->OperGet() == GT_COMMA)
8533 GenTree* before = tree->gtGetOp1();
8534 // "before" should evaluate an array base for the "after" indexing.
8535 if (before->OperGet() != GT_ASG)
8539 GenTree* lhs = before->gtGetOp1();
8540 GenTree* rhs = before->gtGetOp2();
8542 // "rhs" should contain an GT_INDEX
8543 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
8547 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
8548 GenTree* after = tree->gtGetOp2();
8549 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
8550 return optExtractArrIndex(after, result, lhsNum);
8556 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pTree, Compiler::fgWalkData* data)
8558 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
8561 //-------------------------------------------------------------------------
8562 // optIsStackLocalInvariant: Is stack local invariant in loop.
8565 // loopNum The loop in which the variable is tested for invariance.
8566 // lclNum The local that is tested for invariance in the loop.
8569 // Returns true if the variable is loop invariant in loopNum.
8571 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
8573 if (lvaVarAddrExposed(lclNum))
8577 if (optIsVarAssgLoop(loopNum, lclNum))
8584 //----------------------------------------------------------------------------------------------
8585 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
8586 // identify as potential candidate and update the loop context.
8589 // tree The tree encountered during the tree walk.
8590 // info Supplies information about the current block or stmt in which the tree is.
8591 // Also supplies the "context" pointer for updating with loop cloning
8592 // candidates. Also supplies loopNum.
8595 // If array index can be reconstructed, check if the iter var of the loop matches the
8596 // array index var in some dim. Also ensure other index vars before the identified
8597 // dim are loop invariant.
8600 // Skip sub trees if the optimization candidate is identified or else continue walking
8602 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info)
8604 ArrIndex arrIndex(getAllocator());
8606 // Check if array index can be optimized.
8607 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
8609 assert(tree->gtOper == GT_COMMA);
8613 JITDUMP("Found ArrIndex at tree ");
8615 printf(" which is equivalent to: ");
8620 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
8622 return WALK_SKIP_SUBTREES;
8625 // Walk the dimensions and see if iterVar of the loop is used as index.
8626 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
8628 // Is index variable also used as the loop iter var.
8629 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
8631 // Check the previous indices are all loop invariant.
8632 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
8634 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
8636 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
8637 return WALK_SKIP_SUBTREES;
8643 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
8645 JITDUMP(" on dim %d\n", dim);
8648 // Update the loop context.
8649 info->context->EnsureLoopOptInfo(info->loopNum)
8650 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
8654 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
8658 return WALK_SKIP_SUBTREES;
8660 else if (tree->gtOper == GT_ARR_ELEM)
8662 // TODO-CQ: CLONE: Implement.
8663 return WALK_SKIP_SUBTREES;
8665 return WALK_CONTINUE;
8668 struct optRangeCheckDsc
8670 Compiler* pCompiler;
8674 Walk to make sure that only locals and constants are contained in the index
8677 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTree** pTree, fgWalkData* data)
8679 GenTree* tree = *pTree;
8680 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
8682 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
8684 pData->bValidIndex = false;
8688 if (tree->gtOper == GT_LCL_VAR)
8690 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
8692 pData->bValidIndex = false;
8697 return WALK_CONTINUE;
8701 returns true if a range check can legally be removed (for the moment it checks
8702 that the array is a local array (non subject to racing conditions) and that the
8703 index is either a constant or a local
8705 bool Compiler::optIsRangeCheckRemovable(GenTree* tree)
8707 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
8708 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
8709 GenTree* pArray = bndsChk->GetArray();
8710 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
8714 GenTree* pIndex = bndsChk->gtIndex;
8716 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
8717 // Otherwise we can be targeted by malicious race-conditions.
8718 if (pArray != nullptr)
8720 if (pArray->gtOper != GT_LCL_VAR)
8726 printf("Can't remove range check if the array isn't referenced with a local\n");
8734 noway_assert(pArray->gtType == TYP_REF);
8735 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8737 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8739 // If the array address has been taken, don't do the optimization
8740 // (this restriction can be lowered a bit, but i don't think it's worth it)
8741 CLANG_FORMAT_COMMENT_ANCHOR;
8745 printf("Can't remove range check if the array has its address taken\n");
8754 optRangeCheckDsc Data;
8755 Data.pCompiler = this;
8756 Data.bValidIndex = true;
8758 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8760 if (!Data.bValidIndex)
8765 printf("Can't remove range check with this index");
8776 /******************************************************************************
8778 * Replace x==null with (x|x)==0 if x is a GC-type.
8779 * This will stress code-gen and the emitter to make sure they support such trees.
8784 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8786 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8791 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8792 GenTree* condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8794 noway_assert(condStmt->gtOper == GT_JTRUE);
8799 GenTree* comparand = optIsBoolCond(condStmt, &relop, &isBool);
8801 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8806 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8811 GenTree* comparandClone = gtCloneExpr(comparand);
8813 // Bump up the ref-counts of any variables in 'comparandClone'
8814 compCurBB = condBlock;
8815 IncLclVarRefCountsVisitor::WalkTree(this, comparandClone);
8817 noway_assert(relop->gtOp.gtOp1 == comparand);
8818 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8819 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8821 // Comparand type is already checked, and we have const int, there is no harm
8822 // morphing it into a TYP_I_IMPL.
8823 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8824 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8829 /******************************************************************************
8830 * Function used by folding of boolean conditionals
8831 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8832 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8833 * being a boolean lclVar and "op2" the const 0/1.
8834 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8835 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8836 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8837 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8838 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8841 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8843 bool isBool = false;
8845 noway_assert(condBranch->gtOper == GT_JTRUE);
8846 GenTree* cond = condBranch->gtOp.gtOp1;
8848 /* The condition must be "!= 0" or "== 0" */
8850 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8855 /* Return the compare node to the caller */
8859 /* Get hold of the comparands */
8861 GenTree* opr1 = cond->gtOp.gtOp1;
8862 GenTree* opr2 = cond->gtOp.gtOp2;
8864 if (opr2->gtOper != GT_CNS_INT)
8869 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8874 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8876 /* Is the value a boolean?
8877 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8878 * a local variable that is marked as being boolean (lvIsBoolean) */
8880 if (opr1->gtFlags & GTF_BOOLEAN)
8884 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8888 else if (opr1->gtOper == GT_LCL_VAR)
8890 /* is it a boolean local variable */
8892 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8893 noway_assert(lclNum < lvaCount);
8895 if (lvaTable[lclNum].lvIsBoolean)
8901 /* Was our comparison against the constant 1 (i.e. true) */
8904 // If this is a boolean expression tree we can reverse the relop
8905 // and change the true to false.
8908 gtReverseCond(cond);
8909 opr2->gtIntCon.gtIconVal = 0;
8921 void Compiler::optOptimizeBools()
8926 printf("*************** In optOptimizeBools()\n");
8929 printf("Blocks/Trees before phase\n");
8930 fgDispBasicBlocks(true);
8940 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8942 /* We're only interested in conditional jumps here */
8944 if (b1->bbJumpKind != BBJ_COND)
8949 /* If there is no next block, we're done */
8951 BasicBlock* b2 = b1->bbNext;
8957 /* The next block must not be marked as BBF_DONT_REMOVE */
8958 if (b2->bbFlags & BBF_DONT_REMOVE)
8963 /* The next block also needs to be a condition */
8965 if (b2->bbJumpKind != BBJ_COND)
8968 optOptimizeBoolsGcStress(b1);
8973 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8975 if (b1->bbJumpDest == b2->bbJumpDest)
8977 /* Given the following sequence of blocks :
8981 we wil try to fold it to :
8982 B1: brtrue(t1|t2, BX)
8988 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8990 /* Given the following sequence of blocks :
8994 we will try to fold it to :
8995 B1: brtrue((!t1)&&t2, BX)
9006 /* The second block must contain a single statement */
9008 GenTree* s2 = b2->bbTreeList;
9009 if (s2->gtPrev != s2)
9014 noway_assert(s2->gtOper == GT_STMT);
9015 GenTree* t2 = s2->gtStmt.gtStmtExpr;
9016 noway_assert(t2->gtOper == GT_JTRUE);
9018 /* Find the condition for the first block */
9020 GenTree* s1 = b1->bbTreeList->gtPrev;
9022 noway_assert(s1->gtOper == GT_STMT);
9023 GenTree* t1 = s1->gtStmt.gtStmtExpr;
9024 noway_assert(t1->gtOper == GT_JTRUE);
9026 if (b2->countOfInEdges() > 1)
9031 /* Find the branch conditions of b1 and b2 */
9035 GenTree* c1 = optIsBoolCond(t1, &t1, &bool1);
9041 GenTree* c2 = optIsBoolCond(t2, &t2, &bool2);
9047 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
9048 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
9050 // Leave out floats where the bit-representation is more complicated
9051 // - there are two representations for 0.
9053 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
9058 // Make sure the types involved are of the same sizes
9059 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
9063 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
9067 #ifdef _TARGET_ARMARCH_
9068 // Skip the small operand which we cannot encode.
9069 if (varTypeIsSmall(c1->TypeGet()))
9072 /* The second condition must not contain side effects */
9074 if (c2->gtFlags & GTF_GLOB_EFFECT)
9079 /* The second condition must not be too expensive */
9083 if (c2->gtCostEx > 12)
9090 var_types foldType = c1->TypeGet();
9091 if (varTypeIsGC(foldType))
9093 foldType = TYP_I_IMPL;
9098 /* Both conditions must be the same */
9100 if (t1->gtOper != t2->gtOper)
9105 if (t1->gtOper == GT_EQ)
9107 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
9108 So we will branch to BX if (c1&c2)==0 */
9115 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
9116 So we will branch to BX if (c1|c2)!=0 */
9124 /* The b1 condition must be the reverse of the b2 condition */
9126 if (t1->gtOper == t2->gtOper)
9131 if (t1->gtOper == GT_EQ)
9133 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
9134 So we will branch to BX if (c1&c2)!=0 */
9141 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
9142 So we will branch to BX if (c1|c2)==0 */
9149 // Anding requires both values to be 0 or 1
9151 if ((foldOp == GT_AND) && (!bool1 || !bool2))
9157 // Now update the trees
9159 GenTree* cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
9162 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
9163 cmpOp1->gtFlags |= GTF_BOOLEAN;
9167 t1->gtOp.gtOp1 = cmpOp1;
9168 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
9170 #if FEATURE_SET_FLAGS
9171 // For comparisons against zero we will have the GTF_SET_FLAGS set
9172 // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree)
9173 // during the CSE phase.
9175 // So make sure to clear any GTF_SET_FLAGS bit on these operations
9176 // as they are no longer feeding directly into a comparisons against zero
9178 // Make sure that the GTF_SET_FLAGS bit is cleared.
9179 // Fix 388436 ARM JitStress WP7
9180 c1->gtFlags &= ~GTF_SET_FLAGS;
9181 c2->gtFlags &= ~GTF_SET_FLAGS;
9183 // The new top level node that we just created does feed directly into
9184 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
9185 // we generate an instuction that sets the flags, which allows us
9186 // to omit the cmp with zero instruction.
9188 // Request that the codegen for cmpOp1 sets the condition flags
9189 // when it generates the code for cmpOp1.
9191 cmpOp1->gtRequestSetFlags();
9194 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
9197 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
9201 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
9205 edge2 = fgGetPredForBlock(b2->bbNext, b2);
9207 fgRemoveRefPred(b1->bbJumpDest, b1);
9209 b1->bbJumpDest = b2->bbJumpDest;
9211 fgAddRefPred(b2->bbJumpDest, b1);
9214 noway_assert(edge1 != nullptr);
9215 noway_assert(edge2 != nullptr);
9217 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
9218 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
9219 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
9221 edge1->flEdgeWeightMin = edgeSumMin;
9222 edge1->flEdgeWeightMax = edgeSumMax;
9226 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
9227 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
9230 /* Get rid of the second block (which is a BBJ_COND) */
9232 noway_assert(b1->bbJumpKind == BBJ_COND);
9233 noway_assert(b2->bbJumpKind == BBJ_COND);
9234 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
9235 noway_assert(b1->bbNext == b2);
9236 noway_assert(b2->bbNext);
9239 b2->bbFlags |= BBF_REMOVED;
9241 // If b2 was the last block of a try or handler, update the EH table.
9243 ehUpdateForDeletedBlock(b2);
9245 /* Update bbRefs and bbPreds */
9247 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
9248 * Remove pred 'b2' for 'b2->bbJumpDest' */
9250 fgReplacePred(b2->bbNext, b2, b1);
9252 fgRemoveRefPred(b2->bbJumpDest, b2);
9254 /* Update the block numbers and try again */
9266 // Update loop table
9267 fgUpdateLoopsAfterCompacting(b1, b2);
9272 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
9273 b1->bbNum, b2->bbNum);
9282 fgDebugCheckBBlist();