1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
827 optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
830 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
832 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
834 else if (limitOp->gtOper == GT_ARR_LENGTH)
836 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
842 // Save the type of the comparison between the iterator and the limit.
843 optLoopTable[loopInd].lpTestTree = relop;
847 //----------------------------------------------------------------------------------
848 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
851 // incr The incr tree to be checked. Whether incr tree is
852 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
855 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
856 // and the rhs limit is extracted from the "test" tree. The limit information is
857 // added to the loop table.
860 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
862 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
865 genTreeOps updateOper;
866 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
867 if (iterVar != BAD_VAR_NUM)
869 // We have v = v op y type asg node.
882 // Increment should be by a const int.
883 // TODO-CQ: CLONE: allow variable increments.
884 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
893 //----------------------------------------------------------------------------------
894 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
897 // from, to - are blocks (beg, end) which are part of the loop.
898 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
899 // pIterVar - see return value.
902 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
906 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
907 // assigned in the loop.
909 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
912 unsigned iterVar = optIsLoopIncrTree(incr);
913 if (iterVar == BAD_VAR_NUM)
917 if (optIsVarAssigned(from, to, incr, iterVar))
919 JITDUMP("iterVar is assigned in loop\n");
927 //----------------------------------------------------------------------------------
928 // optIsLoopTestEvalIntoTemp:
929 // Pattern match if the test tree is computed into a tmp
930 // and the "tmp" is used as jump condition for loop termination.
933 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
934 // where Vtmp contains the actual loop test result.
935 // newStmt - contains the statement that is the actual test stmt involving
936 // the loop iterator.
939 // Returns true if a new test tree can be obtained.
942 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
943 // Then returns the rhs for def of Vtmp as the "test" node.
946 // This method just retrieves what it thinks is the "test" node,
947 // the callers are expected to verify that "iterVar" is used in the test.
949 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
951 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
953 if (test->gtOper != GT_JTRUE)
958 GenTreePtr relop = test->gtGetOp1();
959 noway_assert(relop->OperIsCompare());
961 GenTreePtr opr1 = relop->gtOp.gtOp1;
962 GenTreePtr opr2 = relop->gtOp.gtOp2;
964 // Make sure we have jtrue (vtmp != 0)
965 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
966 opr2->IsIntegralConst(0))
968 // Get the previous statement to get the def (rhs) of Vtmp to see
969 // if the "test" is evaluated into Vtmp.
970 GenTreePtr prevStmt = testStmt->gtPrev;
971 if (prevStmt == nullptr)
976 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
977 if (tree->OperGet() == GT_ASG)
979 GenTreePtr lhs = tree->gtOp.gtOp1;
980 GenTreePtr rhs = tree->gtOp.gtOp2;
982 // Return as the new test node.
983 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
985 if (rhs->OperIsCompare())
996 //----------------------------------------------------------------------------------
997 // optExtractInitTestIncr:
998 // Extract the "init", "test" and "incr" nodes of the loop.
1001 // head - Loop head block
1002 // bottom - Loop bottom block
1003 // top - Loop top block
1004 // ppInit - The init stmt of the loop if found.
1005 // ppTest - The test stmt of the loop if found.
1006 // ppIncr - The incr stmt of the loop if found.
1009 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1010 // returns true. Returns false if the information can't be extracted.
1013 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1014 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1015 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1018 // +-------<-----------------<-----------+
1021 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1023 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1024 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1025 // in the jtrue condition.
1028 // This method just retrieves what it thinks is the "test" node,
1029 // the callers are expected to verify that "iterVar" is used in the test.
1031 bool Compiler::optExtractInitTestIncr(
1032 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1034 assert(ppInit != nullptr);
1035 assert(ppTest != nullptr);
1036 assert(ppIncr != nullptr);
1038 // Check if last two statements in the loop body are the increment of the iterator
1039 // and the loop termination test.
1040 noway_assert(bottom->bbTreeList != nullptr);
1041 GenTreePtr test = bottom->bbTreeList->gtPrev;
1042 noway_assert(test != nullptr && test->gtNext == nullptr);
1045 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1050 // Check if we have the incr tree before the test tree, if we don't,
1051 // check if incr is part of the loop "top".
1052 GenTreePtr incr = test->gtPrev;
1053 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1055 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1060 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1061 GenTreePtr topLast = top->bbTreeList->gtPrev;
1062 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1072 assert(test != incr);
1074 // Find the last statement in the loop pre-header which we expect to be the initialization of
1075 // the loop iterator.
1076 GenTreePtr phdr = head->bbTreeList;
1077 if (phdr == nullptr)
1082 GenTreePtr init = phdr->gtPrev;
1083 noway_assert(init != nullptr && (init->gtNext == nullptr));
1085 // If it is a duplicated loop condition, skip it.
1086 if (init->gtFlags & GTF_STMT_CMPADD)
1088 bool doGetPrev = true;
1092 // Previous optimization passes may have inserted compiler-generated
1093 // statements other than duplicated loop conditions.
1094 doGetPrev = (init->gtPrev != nullptr);
1098 // Must be a duplicated loop condition.
1099 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1104 init = init->gtPrev;
1106 noway_assert(init != nullptr);
1109 noway_assert(init->gtOper == GT_STMT);
1110 noway_assert(test->gtOper == GT_STMT);
1111 noway_assert(incr->gtOper == GT_STMT);
1113 *ppInit = init->gtStmt.gtStmtExpr;
1114 *ppTest = test->gtStmt.gtStmtExpr;
1115 *ppIncr = incr->gtStmt.gtStmtExpr;
1120 /*****************************************************************************
1122 * Record the loop in the loop table.
1125 void Compiler::optRecordLoop(BasicBlock* head,
1131 unsigned char exitCnt)
1133 // Record this loop in the table, if there's room.
1135 assert(optLoopCount <= MAX_LOOP_NUM);
1136 if (optLoopCount == MAX_LOOP_NUM)
1139 loopOverflowThisMethod = true;
1144 // Assumed preconditions on the loop we're adding.
1145 assert(first->bbNum <= top->bbNum);
1146 assert(top->bbNum <= entry->bbNum);
1147 assert(entry->bbNum <= bottom->bbNum);
1148 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1150 // If the new loop contains any existing ones, add it in the right place.
1151 unsigned char loopInd = optLoopCount;
1152 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1154 unsigned char prev = prevPlus1 - 1;
1155 if (optLoopTable[prev].lpContainedBy(first, bottom))
1160 // Move up any loops if necessary.
1161 for (unsigned j = optLoopCount; j > loopInd; j--)
1163 optLoopTable[j] = optLoopTable[j - 1];
1167 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1169 // The loop is well-formed.
1170 assert(optLoopTable[i].lpWellFormed());
1171 // Check for disjoint.
1172 if (optLoopTable[i].lpDisjoint(first, bottom))
1176 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1177 assert(optLoopTable[i].lpContainedBy(first, bottom));
1181 optLoopTable[loopInd].lpHead = head;
1182 optLoopTable[loopInd].lpFirst = first;
1183 optLoopTable[loopInd].lpTop = top;
1184 optLoopTable[loopInd].lpBottom = bottom;
1185 optLoopTable[loopInd].lpEntry = entry;
1186 optLoopTable[loopInd].lpExit = exit;
1187 optLoopTable[loopInd].lpExitCnt = exitCnt;
1189 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1190 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1191 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1193 optLoopTable[loopInd].lpFlags = 0;
1195 // We haven't yet recorded any side effects.
1196 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1197 optLoopTable[loopInd].lpFieldsModified = nullptr;
1198 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1200 // If DO-WHILE loop mark it as such.
1201 if (head->bbNext == entry)
1203 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1206 // If single exit loop mark it as such.
1210 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1214 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1215 // We have the following restrictions:
1216 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1217 // 2. There must be a loop iterator (a local var) that is
1218 // incremented (decremented or lsh, rsh, mul) with a constant value
1219 // 3. The iterator is incremented exactly once
1220 // 4. The loop condition must use the iterator.
1222 if (bottom->bbJumpKind == BBJ_COND)
1227 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1232 unsigned iterVar = BAD_VAR_NUM;
1233 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1238 // Make sure the "iterVar" initialization is never skipped,
1239 // i.e. every pred of ENTRY other than HEAD is in the loop.
1240 for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
1242 BasicBlock* predBlock = predEdge->flBlock;
1243 if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
1249 if (!optPopulateInitInfo(loopInd, init, iterVar))
1254 // Check that the iterator is used in the loop condition.
1255 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1260 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1261 // Record the iterator, the pointer to the test node
1262 // and the initial value of the iterator (constant or local var)
1263 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1266 optLoopTable[loopInd].lpIterTree = incr;
1269 // Save the initial value of the iterator - can be lclVar or constant
1270 // Flag the loop accordingly.
1276 simpleTestLoopCount++;
1279 // Check if a constant iteration loop.
1280 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1282 // This is a constant loop.
1283 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1285 constIterLoopCount++;
1292 printf("\nConstant loop initializer:\n");
1295 printf("\nConstant loop body:\n");
1297 BasicBlock* block = head;
1300 block = block->bbNext;
1301 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1303 if (stmt->gtStmt.gtStmtExpr == incr)
1308 gtDispTree(stmt->gtStmt.gtStmtExpr);
1310 } while (block != bottom);
1316 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1321 //------------------------------------------------------------------------
1322 // optPrintLoopRecording: Print a recording of the loop.
1325 // loopInd - loop index.
1327 void Compiler::optPrintLoopRecording(unsigned loopInd)
1329 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1330 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1331 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1332 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1333 optLoopTable[loopInd].lpExit);
1335 // If an iterator loop print the iterator and the initialization.
1336 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1338 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1340 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1342 printf("%d )", optLoopTable[loopInd].lpIterConst());
1344 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1346 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1348 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1350 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1353 // If a simple test condition print operator and the limits */
1354 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1356 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1358 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1361 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1363 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1372 void Compiler::optCheckPreds()
1375 BasicBlock* blockPred;
1378 for (block = fgFirstBB; block; block = block->bbNext)
1380 for (pred = block->bbPreds; pred; pred = pred->flNext)
1382 // make sure this pred is part of the BB list
1383 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1385 if (blockPred == pred->flBlock)
1390 noway_assert(blockPred);
1391 switch (blockPred->bbJumpKind)
1394 if (blockPred->bbJumpDest == block)
1400 noway_assert(blockPred->bbNext == block);
1402 case BBJ_EHFILTERRET:
1404 case BBJ_EHCATCHRET:
1405 noway_assert(blockPred->bbJumpDest == block);
1416 /*****************************************************************************
1417 * Find the natural loops, using dominators. Note that the test for
1418 * a loop is slightly different from the standard one, because we have
1419 * not done a depth first reordering of the basic blocks.
1422 void Compiler::optFindNaturalLoops()
1427 printf("*************** In optFindNaturalLoops()\n");
1433 flowList* predEntry;
1435 noway_assert(fgDomsComputed);
1439 hasMethodLoops = false;
1440 loopsThisMethod = 0;
1441 loopOverflowThisMethod = false;
1444 /* We will use the following terminology:
1445 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1446 Not part of the looping of the loop.
1447 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1448 * but not the outer loop. ???)
1449 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1450 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1451 * EXIT - the loop exit or the block right after the bottom
1452 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1454 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1488 unsigned char exitCount;
1490 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1496 // Blocks that are rarely run have a zero bbWeight and should
1497 // never be optimized here
1499 if (top->bbWeight == BB_ZERO_WEIGHT)
1504 for (pred = top->bbPreds; pred; pred = pred->flNext)
1506 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1507 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1508 * as the definition says, but merely an indication that we have a loop there).
1509 * Thus, we have to be very careful and after entry discovery check that it is indeed
1510 * the only place we enter the loop (especially for non-reducible flow graphs).
1513 bottom = pred->flBlock;
1516 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1518 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1519 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1520 (bottom->bbJumpKind == BBJ_SWITCH))
1522 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1523 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1527 BasicBlock* loopBlock;
1529 /* The presence of a "back edge" is an indication that a loop might be present here
1532 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1533 * node in the loop to any other node in the loop (wholly within the loop)
1534 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1535 * in the loop from outside the loop, and that is through the ENTRY
1538 /* Let's find the loop ENTRY */
1540 if (head->bbJumpKind == BBJ_ALWAYS)
1542 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1544 /* OK - we enter somewhere within the loop */
1545 entry = head->bbJumpDest;
1547 /* some useful asserts
1548 * Cannot enter at the top - should have being caught by redundant jumps */
1550 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1554 /* special case - don't consider now */
1555 // assert (!"Loop entered in weird way!");
1559 // Can we fall through into the loop?
1560 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1562 /* The ENTRY is at the TOP (a do-while loop) */
1567 goto NO_LOOP; // head does not flow into the loop bail for now
1570 // Now we find the "first" block -- the earliest block reachable within the loop.
1571 // This is usually the same as "top", but can differ in rare cases where "top" is
1572 // the entry block of a nested loop, and that nested loop branches backwards to a
1573 // a block before "top". We find this by searching for such backwards branches
1574 // in the loop known so far.
1575 BasicBlock* first = top;
1576 BasicBlock* newFirst;
1577 bool blocksToSearch = true;
1578 BasicBlock* validatedAfter = bottom->bbNext;
1579 while (blocksToSearch)
1581 blocksToSearch = false;
1583 blocksToSearch = false;
1584 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1586 unsigned nSucc = loopBlock->NumSucc();
1587 for (unsigned j = 0; j < nSucc; j++)
1589 BasicBlock* succ = loopBlock->GetSucc(j);
1590 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1591 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1597 if (newFirst != nullptr)
1599 validatedAfter = first;
1601 blocksToSearch = true;
1605 // Is "head" still before "first"? If not, we don't have a valid loop...
1606 if (head->bbNum >= first->bbNum)
1609 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1610 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1614 /* Make sure ENTRY dominates all blocks in the loop
1615 * This is necessary to ensure condition 2. above
1616 * At the same time check if the loop has a single exit
1617 * point - those loops are easier to optimize */
1619 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1621 if (!fgDominate(entry, loopBlock))
1626 if (loopBlock == bottom)
1628 if (bottom->bbJumpKind != BBJ_ALWAYS)
1630 /* there is an exit at the bottom */
1632 noway_assert(bottom->bbJumpDest == top);
1639 BasicBlock* exitPoint;
1641 switch (loopBlock->bbJumpKind)
1644 case BBJ_CALLFINALLY:
1646 case BBJ_EHCATCHRET:
1647 assert(loopBlock->bbJumpDest);
1648 exitPoint = loopBlock->bbJumpDest;
1650 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1652 /* exit from a block other than BOTTOM */
1661 case BBJ_EHFINALLYRET:
1662 case BBJ_EHFILTERRET:
1663 /* The "try" associated with this "finally" must be in the
1664 * same loop, so the finally block will return control inside the loop */
1669 /* those are exits from the loop */
1677 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1678 BasicBlock** jumpTab;
1679 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1683 noway_assert(*jumpTab);
1684 exitPoint = *jumpTab;
1686 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1691 } while (++jumpTab, --jumpCnt);
1695 noway_assert(!"Unexpected bbJumpKind");
1700 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1701 * This is to ensure condition 1. above which prevents marking fake loops
1703 * Below is an example:
1711 * The example above is not a loop since we bail after the first iteration
1713 * The condition we have to check for is
1714 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1715 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1717 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1718 * loop bottom then we cannot iterate
1720 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1721 * part of the loop nodes (as per definition they are loop exits executed only once),
1722 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1724 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1726 switch (loopBlock->bbJumpKind)
1731 if (fgDominate(loopBlock, bottom))
1740 bool canIterateLoop = false;
1742 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1744 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1746 canIterateLoop = true;
1749 else if (predEntry->flBlock != head)
1751 // The entry block has multiple predecessors outside the loop; the 'head'
1752 // block isn't the only one. We only support a single 'head', so bail.
1757 if (!canIterateLoop)
1762 /* Double check - make sure that all loop blocks except ENTRY
1763 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1764 * us from considering non-loops due to incorrectly assuming that we had a back edge
1767 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1770 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1772 if (loopBlock == entry)
1777 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1779 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1781 // noway_assert(!"Found loop with multiple entries");
1787 // Disqualify loops where the first block of the loop is less nested in EH than
1788 // the bottom block. That is, we don't want to handle loops where the back edge
1789 // goes from within an EH region to a first block that is outside that same EH
1790 // region. Note that we *do* handle loops where the first block is the *first*
1791 // block of a more nested EH region (since it is legal to branch to the first
1792 // block of an immediately more nested EH region). So, for example, disqualify
1799 // BB10 BBJ_COND => BB02
1803 // Here, BB10 is more nested than BB02.
1805 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1807 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1809 first->bbNum, bottom->bbNum);
1813 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1814 // Disqualify loops where the first block of the loop is a finally target.
1815 // The main problem is when multiple loops share a 'first' block that is a finally
1816 // target and we canonicalize the loops by adding a new loop head. In that case, we
1817 // need to update the blocks so the finally target bit is moved to the newly created
1818 // block, and removed from the old 'first' block. This is 'hard', so at this point
1819 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1820 // long-term), it's easier to disallow the loop than to update the flow graph to
1821 // support this case.
1823 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1825 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1828 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1830 /* At this point we have a loop - record it in the loop table
1831 * If we found only one exit, record it in the table too
1832 * (otherwise an exit = 0 in the loop table means multiple exits) */
1839 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1842 if (!hasMethodLoops)
1844 /* mark the method as containing natural loops */
1846 hasMethodLoops = true;
1849 /* increment total number of loops found */
1853 /* keep track of the number of exits */
1854 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1855 #endif // COUNT_LOOPS
1858 /* current predecessor not good for a loop - continue with another one, if any */
1864 loopCountTable.record(loopsThisMethod);
1865 if (maxLoopsPerMethod < loopsThisMethod)
1867 maxLoopsPerMethod = loopsThisMethod;
1869 if (loopOverflowThisMethod)
1871 totalLoopOverflows++;
1873 #endif // COUNT_LOOPS
1875 // Now the loop indices are stable. We can figure out parent/child relationships
1876 // (using table indices to name loops), and label blocks.
1877 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1879 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1882 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1884 optLoopTable[loopInd].lpParent = possibleParent;
1885 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1886 optLoopTable[possibleParent].lpChild = loopInd;
1892 // Now label the blocks with the innermost loop to which they belong. Since parents
1893 // precede children in the table, doing the labeling for each loop in order will achieve
1894 // this -- the innermost loop labeling will be done last.
1895 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1897 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1898 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1899 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1901 blk->bbNatLoopNum = loopInd;
1906 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1910 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1911 // one, if necessary, for loops containing others that share a "top."
1913 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1915 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1916 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1922 if (optCanonicalizeLoopNest(loopInd))
1929 fgUpdateChangedFlowGraph();
1933 if (verbose && optLoopCount > 0)
1935 printf("\nFinal natural loop table:\n");
1936 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1938 optPrintLoopInfo(loopInd);
1945 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1947 BasicBlock* newJumpDest = nullptr;
1948 switch (blk->bbJumpKind)
1953 case BBJ_EHFILTERRET:
1954 case BBJ_EHFINALLYRET:
1955 case BBJ_EHCATCHRET:
1956 // These have no jump destination to update.
1961 case BBJ_CALLFINALLY:
1963 // All of these have a single jump destination to update.
1964 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1966 blk->bbJumpDest = newJumpDest;
1972 bool redirected = false;
1973 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1975 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1977 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1981 // If any redirections happend, invalidate the switch table map for the switch.
1984 GetSwitchDescMap()->Remove(blk);
1994 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1995 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1997 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1999 // copy the jump destination(s) from "from" to "to".
2000 switch (to->bbJumpKind)
2004 case BBJ_CALLFINALLY:
2006 // All of these have a single jump destination to update.
2007 to->bbJumpDest = from->bbJumpDest;
2012 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
2013 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
2014 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
2016 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
2018 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2028 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2029 // Returns 'true' if the flow graph is modified.
2030 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2032 bool modified = false;
2034 // Is the top of the current loop not in any nested loop?
2035 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2037 if (optCanonicalizeLoop(loopInd))
2043 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2044 child = optLoopTable[child].lpSibling)
2046 if (optCanonicalizeLoopNest(child))
2055 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2057 // Is the top uniquely part of the current loop?
2058 BasicBlock* t = optLoopTable[loopInd].lpTop;
2060 if (t->bbNatLoopNum == loopInd)
2065 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2067 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2069 // Otherwise, the top of this loop is also part of a nested loop.
2071 // Insert a new unique top for this loop. We must be careful to put this new
2072 // block in the correct EH region. Note that f->bbPrev might be in a different
2073 // EH region. For example:
2081 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2082 // On the other hand, the first block of multiple loops might be the first
2083 // block of a 'try' region that is completely contained in the multiple loops.
2088 // BB10 BBJ_ALWAYS => BB08
2090 // BB12 BBJ_ALWAYS => BB08
2092 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2093 // is a single-block "try" region. Neither loop "bottom" block is in the same
2094 // "try" region as BB08. This is legal because you can jump to the first block
2095 // of a try region. With EH normalization, no two "try" regions will share
2096 // this block. In this case, we need to insert a new block for the outer loop
2097 // in the same EH region as the branch from the "bottom":
2102 // BB10 BBJ_ALWAYS => BB08
2104 // BB12 BBJ_ALWAYS => BB30
2106 // Another possibility is that the "first" block of the loop nest can be the first block
2107 // of a "try" region that also has other predecessors than those in the loop, or even in
2108 // the "try" region (since blocks can target the first block of a "try" region). For example:
2112 // BB10 BBJ_ALWAYS => BB08
2114 // BB12 BBJ_ALWAYS => BB08
2117 // BB20 BBJ_ALWAYS => BB08
2119 // BB25 BBJ_ALWAYS => BB08
2121 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2122 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2123 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2124 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2125 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2126 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2127 // situation of branching to a non-first block of a 'try' region.
2129 // We can also have a loop nest where the "first" block is outside of a "try" region
2130 // and the back edges are inside a "try" region, for example:
2134 // BB09 try { BBJ_COND => BB02
2136 // BB15 BBJ_COND => BB02
2138 // BB21 } // end of "try"
2140 // In this case, both loop back edges were formed by "leave" instructions that were
2141 // imported into branches that were later made conditional. In this case, we don't
2142 // want to copy the EH region of the back edge, since that would create a block
2143 // outside of and disjoint with the "try" region of the back edge. However, to
2144 // simplify things, we disqualify this type of loop, so we should never see this here.
2146 BasicBlock* h = optLoopTable[loopInd].lpHead;
2147 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2148 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2150 // The loop must be entirely contained within a single handler region.
2151 assert(BasicBlock::sameHndRegion(f, b));
2153 // If the bottom block is in the same "try" region, then we extend the EH
2154 // region. Otherwise, we add the new block outside the "try" region.
2155 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2156 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2159 // We need to set the EH region manually. Set it to be the same
2160 // as the bottom block.
2161 newT->copyEHRegion(b);
2164 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2166 // Redirect the "bottom" of the current loop to "newT".
2167 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2168 blockMap->Set(t, newT);
2169 optRedirectBlock(b, blockMap);
2171 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2172 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2173 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2174 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2177 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2178 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2179 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2180 // edge of the blockMap, so nothing will happen.
2181 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2183 BasicBlock* topPredBlock = topPred->flBlock;
2185 // Skip if topPredBlock is in the loop.
2186 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2187 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2188 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2189 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2191 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2192 "redirecting its bottom edge\n",
2193 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2197 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2199 optRedirectBlock(topPredBlock, blockMap);
2202 assert(newT->bbNext == f);
2205 newT->bbJumpKind = BBJ_ALWAYS;
2206 newT->bbJumpDest = t;
2207 newT->bbTreeList = nullptr;
2208 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2211 // If it had been a do-while loop (top == entry), update entry, as well.
2212 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2213 if (optLoopTable[loopInd].lpTop == origE)
2215 optLoopTable[loopInd].lpEntry = newT;
2217 optLoopTable[loopInd].lpTop = newT;
2218 optLoopTable[loopInd].lpFirst = newT;
2220 newT->bbNatLoopNum = loopInd;
2222 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2223 dspPtr(newT), loopInd);
2225 // Make sure the head block still goes to the entry...
2226 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2228 h->bbJumpKind = BBJ_ALWAYS;
2229 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2231 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2233 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2234 optLoopTable[loopInd].lpHead = h2;
2235 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2236 h2->bbTreeList = nullptr;
2237 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2240 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2241 // it must be the case that they were do-while's (since "h" fell through to the entry).
2242 // The new node "newT" becomes the head of such loops.
2243 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2244 childLoop = optLoopTable[childLoop].lpSibling)
2246 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2247 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2249 optUpdateLoopHead(childLoop, h, newT);
2255 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2257 assert(l1 != BasicBlock::NOT_IN_LOOP);
2262 else if (l2 == BasicBlock::NOT_IN_LOOP)
2268 return optLoopContains(l1, optLoopTable[l2].lpParent);
2272 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2274 assert(optLoopTable[loopInd].lpHead == from);
2275 optLoopTable[loopInd].lpHead = to;
2276 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2277 childLoop = optLoopTable[childLoop].lpSibling)
2279 if (optLoopTable[childLoop].lpHead == from)
2281 optUpdateLoopHead(childLoop, from, to);
2286 /*****************************************************************************
2287 * If the : i += const" will cause an overflow exception for the small types.
2290 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2297 type_MAX = SCHAR_MAX;
2300 type_MAX = UCHAR_MAX;
2303 type_MAX = SHRT_MAX;
2306 type_MAX = USHRT_MAX;
2309 case TYP_UINT: // Detected by checking for 32bit ....
2311 return false; // ... overflow same as done for TYP_INT
2317 if (iterAtExit > type_MAX)
2327 /*****************************************************************************
2328 * If the "i -= const" will cause an underflow exception for the small types
2331 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2338 type_MIN = SCHAR_MIN;
2341 type_MIN = SHRT_MIN;
2350 case TYP_UINT: // Detected by checking for 32bit ....
2352 return false; // ... underflow same as done for TYP_INT
2358 if (iterAtExit < type_MIN)
2368 /*****************************************************************************
2370 * Helper for unroll loops - Computes the number of repetitions
2371 * in a constant loop. If it cannot prove the number is constant returns false
2374 bool Compiler::optComputeLoopRep(int constInit,
2377 genTreeOps iterOper,
2378 var_types iterOperType,
2379 genTreeOps testOper,
2382 unsigned* iterCount)
2384 noway_assert(genActualType(iterOperType) == TYP_INT);
2387 __int64 constLimitX;
2392 // Using this, we can just do a signed comparison with other 32 bit values.
2395 constLimitX = (unsigned int)constLimit;
2399 constLimitX = (signed int)constLimit;
2402 switch (iterOperType)
2404 // For small types, the iteration operator will narrow these values if big
2406 #define INIT_ITER_BY_TYPE(type) \
2407 constInitX = (type)constInit; \
2408 iterInc = (type)iterInc;
2411 INIT_ITER_BY_TYPE(signed char);
2414 INIT_ITER_BY_TYPE(unsigned char);
2417 INIT_ITER_BY_TYPE(signed short);
2420 INIT_ITER_BY_TYPE(unsigned short);
2423 // For the big types, 32 bit arithmetic is performed
2429 constInitX = (unsigned int)constInit;
2433 constInitX = (signed int)constInit;
2438 noway_assert(!"Bad type");
2442 /* If iterInc is zero we have an infinite loop */
2448 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2449 iterSign = (iterInc > 0) ? +1 : -1;
2451 /* Initialize loopCount to zero */
2454 // If dupCond is true then the loop head contains a test which skips
2455 // this loop, if the constInit does not pass the loop test
2456 // Such a loop can execute zero times.
2457 // If dupCond is false then we have a true do-while loop which we
2458 // always execute the loop once before performing the loop test
2462 constInitX += iterInc;
2465 // bail if count is based on wrap-around math
2468 if (constLimitX < constInitX)
2473 else if (constLimitX > constInitX)
2478 /* Compute the number of repetitions */
2482 __int64 iterAtExitX;
2485 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2489 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2490 * have a constant number of iterations or loop forever -
2491 * we have to compute (lim-init) mod iterInc to see if it is zero.
2492 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2493 * which is probably not what the end user wanted, but it is legal.
2498 /* Stepping by one, i.e. Mod with 1 is always zero */
2501 if (((constLimitX - constInitX) % iterInc) != 0)
2509 noway_assert(iterInc < 0);
2510 /* Stepping by -1, i.e. Mod with 1 is always zero */
2513 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2529 if (constInitX != constLimitX)
2531 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2534 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2538 iterAtExitX = (unsigned)iterAtExitX;
2541 // Check if iteration incr will cause overflow for small types
2542 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2547 // iterator with 32bit overflow. Bad for TYP_(U)INT
2548 if (iterAtExitX < constLimitX)
2553 *iterCount = loopCount;
2569 noway_assert(!"Unknown operator for loop iterator");
2583 if (constInitX < constLimitX)
2585 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2588 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2592 iterAtExitX = (unsigned)iterAtExitX;
2595 // Check if iteration incr will cause overflow for small types
2596 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2601 // iterator with 32bit overflow. Bad for TYP_(U)INT
2602 if (iterAtExitX < constLimitX)
2607 *iterCount = loopCount;
2623 noway_assert(!"Unknown operator for loop iterator");
2637 if (constInitX <= constLimitX)
2639 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2642 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2646 iterAtExitX = (unsigned)iterAtExitX;
2649 // Check if iteration incr will cause overflow for small types
2650 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2655 // iterator with 32bit overflow. Bad for TYP_(U)INT
2656 if (iterAtExitX <= constLimitX)
2661 *iterCount = loopCount;
2677 noway_assert(!"Unknown operator for loop iterator");
2691 if (constInitX > constLimitX)
2693 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2696 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2700 iterAtExitX = (unsigned)iterAtExitX;
2703 // Check if small types will underflow
2704 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2709 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2710 if (iterAtExitX > constLimitX)
2715 *iterCount = loopCount;
2731 noway_assert(!"Unknown operator for loop iterator");
2745 if (constInitX >= constLimitX)
2747 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2750 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2754 iterAtExitX = (unsigned)iterAtExitX;
2757 // Check if small types will underflow
2758 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2763 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2764 if (iterAtExitX >= constLimitX)
2769 *iterCount = loopCount;
2785 noway_assert(!"Unknown operator for loop iterator");
2790 noway_assert(!"Unknown operator for loop condition");
2796 /*****************************************************************************
2798 * Look for loop unrolling candidates and unroll them
2802 #pragma warning(push)
2803 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2805 void Compiler::optUnrollLoops()
2807 if (compCodeOpt() == SMALL_CODE)
2812 if (optLoopCount == 0)
2818 if (JitConfig.JitNoUnroll())
2824 if (optCanCloneLoops())
2832 printf("*************** In optUnrollLoops()\n");
2835 /* Look for loop unrolling candidates */
2837 /* Double loop so that after unrolling an inner loop we set change to true
2838 * and we then go back over all of the loop candidates and try to unroll
2839 * the next outer loop, until we don't unroll any loops,
2840 * then change will be false and we are done.
2844 bool change = false;
2846 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2860 int lbeg; // initial value for iterator
2861 int llim; // limit value for iterator
2862 unsigned lvar; // iterator lclVar #
2863 int iterInc; // value to increment the iterator
2864 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2865 var_types iterOperType; // type result of the oper (for overflow instrs)
2866 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2867 bool unsTest; // Is the comparison u/int
2869 unsigned totalIter; // total number of iterations in the constant loop
2870 unsigned loopCostSz; // Cost is size of one iteration
2871 unsigned loopFlags; // actual lpFlags
2872 unsigned requiredFlags; // required lpFlags
2874 GenTree* loopList; // new stmt list of the unrolled loop
2877 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2884 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2885 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2887 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2890 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2896 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2903 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2904 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2906 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2909 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2911 unrollLimitSz *= 10;
2915 loopFlags = optLoopTable[lnum].lpFlags;
2916 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2918 /* Ignore the loop if we don't have a do-while with a single exit
2919 that has a constant number of iterations */
2921 if ((loopFlags & requiredFlags) != requiredFlags)
2926 /* ignore if removed or marked as not unrollable */
2928 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2933 head = optLoopTable[lnum].lpHead;
2935 bottom = optLoopTable[lnum].lpBottom;
2936 noway_assert(bottom);
2938 /* The single exit must be at the bottom of the loop */
2939 noway_assert(optLoopTable[lnum].lpExit);
2940 if (optLoopTable[lnum].lpExit != bottom)
2945 /* Unrolling loops with jumps in them is not worth the headache
2946 * Later we might consider unrolling loops after un-switching */
2951 block = block->bbNext;
2952 noway_assert(block);
2954 if (block->bbJumpKind != BBJ_NONE)
2956 if (block != bottom)
2961 } while (block != bottom);
2963 /* Get the loop data:
2967 - iterator increment
2968 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2969 - loop test type (i.e. GT_GE, GT_LT, etc...)
2972 lbeg = optLoopTable[lnum].lpConstInit;
2973 llim = optLoopTable[lnum].lpConstLimit();
2974 testOper = optLoopTable[lnum].lpTestOper();
2976 lvar = optLoopTable[lnum].lpIterVar();
2977 iterInc = optLoopTable[lnum].lpIterConst();
2978 iterOper = optLoopTable[lnum].lpIterOper();
2980 iterOperType = optLoopTable[lnum].lpIterOperType();
2981 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2983 if (lvaTable[lvar].lvAddrExposed)
2984 { // If the loop iteration variable is address-exposed then bail
2987 if (lvaTable[lvar].lvIsStructField)
2988 { // If the loop iteration variable is a promoted field from a struct then
2993 /* Locate the pre-header and initialization and increment/test statements */
2995 phdr = head->bbTreeList;
2997 loop = bottom->bbTreeList;
3000 init = head->lastStmt();
3001 noway_assert(init && (init->gtNext == nullptr));
3002 test = bottom->lastStmt();
3003 noway_assert(test && (test->gtNext == nullptr));
3004 incr = test->gtPrev;
3007 if (init->gtFlags & GTF_STMT_CMPADD)
3009 /* Must be a duplicated loop condition */
3010 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3013 init = init->gtPrev;
3021 /* Find the number of iterations - the function returns false if not a constant number */
3023 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3028 /* Forget it if there are too many repetitions or not a constant loop */
3030 if (totalIter > iterLimit)
3035 noway_assert(init->gtOper == GT_STMT);
3036 init = init->gtStmt.gtStmtExpr;
3037 noway_assert(test->gtOper == GT_STMT);
3038 test = test->gtStmt.gtStmtExpr;
3039 noway_assert(incr->gtOper == GT_STMT);
3040 incr = incr->gtStmt.gtStmtExpr;
3042 // Don't unroll loops we don't understand.
3043 if (incr->gtOper == GT_ASG)
3048 /* Make sure everything looks ok */
3049 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3050 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3051 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3053 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
3054 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
3055 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3057 (test->gtOper != GT_JTRUE))
3059 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3063 /* heuristic - Estimated cost in code size of the unrolled loop */
3071 block = block->bbNext;
3073 /* Visit all the statements in the block */
3075 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3077 /* Get the expression and stop if end reached */
3079 GenTreePtr expr = stmt->gtStmtExpr;
3085 /* Calculate gtCostSz */
3086 gtSetStmtInfo(stmt);
3088 /* Update loopCostSz */
3089 loopCostSz += stmt->gtCostSz;
3091 } while (block != bottom);
3093 /* Compute the estimated increase in code size for the unrolled loop */
3095 unsigned int fixedLoopCostSz;
3096 fixedLoopCostSz = 8;
3099 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3101 /* Don't unroll if too much code duplication would result. */
3103 if (unrollCostSz > unrollLimitSz)
3105 /* prevent this loop from being revisited */
3106 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3110 /* Looks like a good idea to unroll this loop, let's do it! */
3111 CLANG_FORMAT_COMMENT_ANCHOR;
3116 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3117 if (head->bbNext->bbNum != bottom->bbNum)
3119 printf("..BB%02u", bottom->bbNum);
3121 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3122 printf(" unrollCostSz = %d\n", unrollCostSz);
3127 /* Create the unrolled loop statement list */
3129 loopList = loopLast = nullptr;
3131 for (lval = lbeg; totalIter; totalIter--)
3140 block = block->bbNext;
3141 noway_assert(block);
3143 /* Visit all the statements in the block */
3145 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3147 /* Stop if we've reached the end of the loop */
3149 if (stmt->gtStmtExpr == incr)
3154 /* Clone/substitute the expression */
3156 expr = gtCloneExpr(stmt, 0, lvar, lval);
3158 // cloneExpr doesn't handle everything
3162 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3166 /* Append the expression to our list */
3170 loopLast->gtNext = expr;
3177 expr->gtPrev = loopLast;
3180 } while (block != bottom);
3182 /* update the new value for the unrolled iterator */
3196 noway_assert(!"Unrolling not implemented for this loop iterator");
3200 noway_assert(!"Unknown operator for constant loop iterator");
3205 /* Finish the linked list */
3209 loopList->gtPrev = loopLast;
3210 loopLast->gtNext = nullptr;
3213 /* Replace the body with the unrolled one */
3219 block = block->bbNext;
3220 noway_assert(block);
3221 block->bbTreeList = nullptr;
3222 block->bbJumpKind = BBJ_NONE;
3223 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3224 } while (block != bottom);
3226 bottom->bbJumpKind = BBJ_NONE;
3227 bottom->bbTreeList = loopList;
3228 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3229 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3233 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3235 /* Update bbRefs and bbPreds */
3236 /* Here head->bbNext is bottom !!! - Replace it */
3238 fgRemoveRefPred(head->bbNext, bottom);
3240 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3241 * (the last value of the iterator in the loop)
3242 * and drop the jump condition since the unrolled loop will always execute */
3244 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3246 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3248 if (head->bbJumpKind == BBJ_COND)
3250 phdr = head->bbTreeList;
3252 test = phdr->gtPrev;
3254 noway_assert(test && (test->gtNext == nullptr));
3255 noway_assert(test->gtOper == GT_STMT);
3256 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3258 init = test->gtPrev;
3259 noway_assert(init && (init->gtNext == test));
3260 noway_assert(init->gtOper == GT_STMT);
3262 init->gtNext = nullptr;
3263 phdr->gtPrev = init;
3264 head->bbJumpKind = BBJ_NONE;
3265 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3267 /* Update bbRefs and bbPreds */
3269 fgRemoveRefPred(head->bbJumpDest, head);
3273 /* the loop must execute */
3274 noway_assert(head->bbJumpKind == BBJ_NONE);
3280 printf("Whole unrolled loop:\n");
3282 GenTreePtr s = loopList;
3286 noway_assert(s->gtOper == GT_STMT);
3297 /* Remember that something has changed */
3301 /* Make sure to update loop table */
3303 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3304 * (also make head and bottom NULL - to hit an assert or GPF) */
3306 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3307 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3319 fgDebugCheckBBlist();
3323 #pragma warning(pop)
3326 /*****************************************************************************
3328 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3329 * not execute a method call.
3332 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3334 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3335 // as some helper calls are neither interruptible nor hijackable.
3336 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3337 // those helpers too.
3339 noway_assert(topBB->bbNum <= botBB->bbNum);
3341 // We can always check topBB and botBB for any gc safe points and early out
3343 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3348 // Otherwise we will need to rely upon the dominator sets
3350 if (!fgDomsComputed)
3352 // return a conservative answer of true when we don't have the dominator sets
3356 BasicBlock* curBB = topBB;
3359 noway_assert(curBB);
3361 // If we added a loop pre-header block then we will
3362 // have a bbNum greater than fgLastBB, and we won't have
3363 // any dominator information about this block, so skip it.
3365 if (curBB->bbNum <= fgLastBB->bbNum)
3367 noway_assert(curBB->bbNum <= botBB->bbNum);
3369 // Does this block contain a gc safe point?
3371 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3373 // Will this block always execute on the way to botBB ?
3375 // Since we are checking every block in [topBB .. botBB] and we are using
3376 // a lexical definition of a loop.
3377 // (all that we know is that is that botBB is a back-edge to topBB)
3378 // Thus while walking blocks in this range we may encounter some blocks
3379 // that are not really part of the loop, and so we need to perform
3380 // some additional checks:
3382 // We will check that the current 'curBB' is reachable from 'topBB'
3383 // and that it dominates the block containing the back-edge 'botBB'
3384 // When both of these are true then we know that the gcsafe point in 'curBB'
3385 // will be encountered in the loop and we can return false
3387 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3394 // If we've reached the destination block, then we're done
3403 curBB = curBB->bbNext;
3406 // If we didn't find any blocks that contained a gc safe point and
3407 // also met the fgDominate and fgReachable criteria then we must return true
3412 /*****************************************************************************
3414 * Find the loop termination test at the bottom of the loop
3417 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3419 GenTreePtr testt = bottom->bbTreeList;
3421 assert(testt && testt->gtOper == GT_STMT);
3423 GenTreePtr result = testt->gtPrev;
3426 while (testt->gtNext)
3428 testt = testt->gtNext;
3431 assert(testt == result);
3437 /*****************************************************************************
3438 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3441 void Compiler::fgOptWhileLoop(BasicBlock* block)
3443 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3444 noway_assert(compCodeOpt() != SMALL_CODE);
3447 Optimize while loops into do { } while loop
3448 Our loop hoisting logic requires do { } while loops.
3449 Specifically, we're looking for the following case:
3460 If we find this, and the condition is simple enough, we change
3461 the loop to the following:
3466 // else fall-through
3477 /* Does the BB end with an unconditional jump? */
3479 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3480 { // It can't be one of the ones we use for our exception magic
3484 // It has to be a forward jump
3485 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3487 if (fgIsForwardBranch(block) == false)
3492 // Get hold of the jump target
3493 BasicBlock* bTest = block->bbJumpDest;
3495 // Does the block consist of 'jtrue(cond) block' ?
3496 if (bTest->bbJumpKind != BBJ_COND)
3501 // bTest must be a backwards jump to block->bbNext
3502 if (bTest->bbJumpDest != block->bbNext)
3507 // Since test is a BBJ_COND it will have a bbNext
3508 noway_assert(bTest->bbNext);
3510 // 'block' must be in the same try region as the condition, since we're going to insert
3511 // a duplicated condition in 'block', and the condition might include exception throwing code.
3512 if (!BasicBlock::sameTryRegion(block, bTest))
3517 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3518 // same try region (or no try region) to avoid generating illegal flow.
3519 BasicBlock* bTestNext = bTest->bbNext;
3520 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3525 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3527 // bTest must only contain only a jtrue with no other stmts, we will only clone
3528 // the conditional, so any other statements will not get cloned
3529 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3531 if (bTest->bbTreeList != condStmt)
3536 /* Get to the condition node from the statement tree */
3538 noway_assert(condStmt->gtOper == GT_STMT);
3540 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3541 noway_assert(condTree->gtOper == GT_JTRUE);
3543 condTree = condTree->gtOp.gtOp1;
3545 // The condTree has to be a RelOp comparison
3546 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3548 if (condTree->OperIsCompare() == false)
3553 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3555 gtPrepareCost(condTree);
3556 unsigned estDupCostSz = condTree->gtCostSz;
3558 double loopIterations = (double)BB_LOOP_WEIGHT;
3560 bool allProfileWeightsAreValid = false;
3561 BasicBlock::weight_t weightBlock = block->bbWeight;
3562 BasicBlock::weight_t weightTest = bTest->bbWeight;
3563 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3565 // If we have profile data then we calculate the number of time
3566 // the loop will iterate into loopIterations
3567 if (fgIsUsingProfileWeights())
3569 // Only rely upon the profile weight when all three of these blocks
3570 // have good profile weights
3571 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3572 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3574 allProfileWeightsAreValid = true;
3576 // If this while loop never iterates then don't bother transforming
3577 if (weightNext == 0)
3582 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3583 // if the profile weights are all valid.
3585 // weightNext is the number of time this loop iterates
3586 // weightBlock is the number of times that we enter the while loop
3587 // loopIterations is the average number of times that this loop iterates
3589 if (weightTest >= weightBlock)
3591 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3596 unsigned maxDupCostSz = 32;
3598 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3599 // set loop weights yet
3600 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3605 // If this loop iterates a lot then raise the maxDupCost
3606 if (loopIterations >= 12.0)
3610 if (loopIterations >= 96.0)
3615 // If the loop condition has a shared static helper, we really want this loop converted
3616 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3617 // be executed on every loop iteration.
3618 int countOfHelpers = 0;
3619 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3621 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3623 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3626 // If the compare has too high cost then we don't want to dup
3628 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3633 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3634 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3635 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3636 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3637 allProfileWeightsAreValid ? "true" : "false");
3646 /* Looks good - duplicate the condition test */
3648 condTree->gtFlags |= GTF_RELOP_ZTT;
3650 condTree = gtCloneExpr(condTree);
3651 gtReverseCond(condTree);
3653 // Make sure clone expr copied the flag
3654 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3656 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3658 /* Create a statement entry out of the condition and
3659 append the condition test at the end of 'block' */
3661 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3663 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3665 if (opts.compDbgInfo)
3667 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3670 // Flag the block that received the copy as potentially having an array/vtable
3671 // reference if the block copied from did; this is a conservative guess.
3672 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3674 block->bbFlags |= copyFlags;
3677 // If we have profile data for all blocks and we know that we are cloning the
3678 // bTest block into block and thus changing the control flow from block so
3679 // that it no longer goes directly to bTest anymore, we have to adjust the
3680 // weight of bTest by subtracting out the weight of block.
3682 if (allProfileWeightsAreValid)
3685 // Some additional sanity checks before adjusting the weight of bTest
3687 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3689 // Get the two edge that flow out of bTest
3690 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3691 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3693 // Calculate the new weight for block bTest
3695 BasicBlock::weight_t newWeightTest =
3696 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3697 bTest->bbWeight = newWeightTest;
3699 if (newWeightTest == BB_ZERO_WEIGHT)
3701 bTest->bbFlags |= BBF_RUN_RARELY;
3702 // All out edge weights are set to zero
3703 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3704 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3705 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3706 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3710 // Update the our edge weights
3711 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3712 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3713 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3714 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3719 /* Change the block to end with a conditional jump */
3721 block->bbJumpKind = BBJ_COND;
3722 block->bbJumpDest = bTest->bbNext;
3724 /* Mark the jump dest block as being a jump target */
3725 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3727 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3729 fgAddRefPred(block->bbNext, block);
3731 fgRemoveRefPred(bTest, block);
3732 fgAddRefPred(bTest->bbNext, block);
3737 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3739 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3741 gtDispTree(copyOfCondStmt);
3747 /*****************************************************************************
3749 * Optimize the BasicBlock layout of the method
3752 void Compiler::optOptimizeLayout()
3754 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3759 printf("*************** In optOptimizeLayout()\n");
3763 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3764 fgDebugCheckBBlist();
3767 noway_assert(fgModified == false);
3769 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3771 /* Make sure the appropriate fields are initialized */
3773 if (block->bbWeight == BB_ZERO_WEIGHT)
3775 /* Zero weighted block can't have a LOOP_HEAD flag */
3776 noway_assert(block->isLoopHead() == false);
3780 assert(block->bbLoopNum == 0);
3782 if (compCodeOpt() != SMALL_CODE)
3784 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3786 fgOptWhileLoop(block);
3792 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3793 fgComputeEdgeWeights();
3796 fgUpdateFlowGraph(true);
3798 fgUpdateFlowGraph();
3801 /*****************************************************************************
3803 * Perform loop inversion, find and classify natural loops
3806 void Compiler::optOptimizeLoops()
3808 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3813 printf("*************** In optOptimizeLoops()\n");
3817 optSetBlockWeights();
3819 /* Were there any loops in the flow graph? */
3823 /* now that we have dominator information we can find loops */
3825 optFindNaturalLoops();
3827 unsigned loopNum = 0;
3829 /* Iterate over the flow graph, marking all loops */
3831 /* We will use the following terminology:
3832 * top - the first basic block in the loop (i.e. the head of the backward edge)
3833 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3834 * lastBottom - used when we have multiple back-edges to the same top
3841 for (top = fgFirstBB; top; top = top->bbNext)
3843 BasicBlock* foundBottom = nullptr;
3845 for (pred = top->bbPreds; pred; pred = pred->flNext)
3847 /* Is this a loop candidate? - We look for "back edges" */
3849 BasicBlock* bottom = pred->flBlock;
3851 /* is this a backward edge? (from BOTTOM to TOP) */
3853 if (top->bbNum > bottom->bbNum)
3858 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3860 if (top->isLoopHead() == false)
3865 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3867 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3872 /* the top block must be able to reach the bottom block */
3873 if (!fgReachable(top, bottom))
3878 /* Found a new loop, record the longest backedge in foundBottom */
3880 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3882 foundBottom = bottom;
3890 /* Mark the loop header as such */
3891 assert(FitsIn<unsigned char>(loopNum));
3892 top->bbLoopNum = (unsigned char)loopNum;
3895 /* Mark all blocks between 'top' and 'bottom' */
3897 optMarkLoopBlocks(top, foundBottom, false);
3900 // We track at most 255 loops
3904 totalUnnatLoopOverflows++;
3911 totalUnnatLoopCount += loopNum;
3919 printf("\nFound a total of %d loops.", loopNum);
3920 printf("\nAfter loop weight marking:\n");
3921 fgDispBasicBlocks();
3926 optLoopsMarked = true;
3930 //------------------------------------------------------------------------
3931 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3934 // loopNum - the current loop index for which conditions are derived.
3935 // context - data structure where all loop cloning info is kept.
3938 // "false" if conditions cannot be obtained. "true" otherwise.
3939 // The cloning conditions are updated in the "conditions"[loopNum] field
3940 // of the "context" parameter.
3943 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3944 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3945 // condition is "less than". If the initializer is "var" init then adds condition
3946 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3947 // are added to "context". These conditions are checked in the pre-header block
3948 // and the cloning choice is made.
3951 // Callers should assume AND operation is used i.e., if all conditions are
3952 // true, then take the fast path.
3954 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3956 JITDUMP("------------------------------------------------------------\n");
3957 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3959 LoopDsc* loop = &optLoopTable[loopNum];
3960 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3962 if (loop->lpTestOper() == GT_LT)
3964 // Stride conditions
3965 if (loop->lpIterConst() <= 0)
3967 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3972 if (loop->lpFlags & LPFLG_CONST_INIT)
3974 // Only allowing const init at this time.
3975 if (loop->lpConstInit < 0)
3977 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3981 else if (loop->lpFlags & LPFLG_VAR_INIT)
3984 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3985 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3986 context->EnsureConditions(loopNum)->Push(geZero);
3990 JITDUMP("> Not variable init\n");
3996 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3998 int limit = loop->lpConstLimit();
4001 JITDUMP("> limit %d is invalid\n", limit);
4004 ident = LC_Ident(limit, LC_Ident::Const);
4006 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
4008 unsigned limitLcl = loop->lpVarLimit();
4009 ident = LC_Ident(limitLcl, LC_Ident::Var);
4011 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
4013 context->EnsureConditions(loopNum)->Push(geZero);
4015 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
4017 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
4018 if (!loop->lpArrLenLimit(this, index))
4020 JITDUMP("> ArrLen not matching");
4023 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4025 // Ensure that this array must be dereference-able, before executing the actual condition.
4026 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4027 context->EnsureDerefs(loopNum)->Push(array);
4031 JITDUMP("> Undetected limit\n");
4035 for (unsigned i = 0; i < optInfos->Size(); ++i)
4037 LcOptInfo* optInfo = optInfos->GetRef(i);
4038 switch (optInfo->GetOptType())
4040 case LcOptInfo::LcJaggedArray:
4043 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4044 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4045 LC_Ident arrLenIdent = LC_Ident(arrLen);
4047 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4048 context->EnsureConditions(loopNum)->Push(cond);
4050 // Ensure that this array must be dereference-able, before executing the actual condition.
4051 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4052 context->EnsureDerefs(loopNum)->Push(array);
4055 case LcOptInfo::LcMdArray:
4057 // limit <= mdArrLen
4058 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4059 LC_Condition cond(GT_LE, LC_Expr(ident),
4060 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4061 mdArrInfo->GetArrIndexForDim(getAllocator()),
4062 mdArrInfo->dim, LC_Array::None))));
4063 context->EnsureConditions(loopNum)->Push(cond);
4068 JITDUMP("Unknown opt\n");
4072 JITDUMP("Conditions: (");
4073 DBEXEC(verbose, context->PrintConditions(loopNum));
4080 //------------------------------------------------------------------------------------
4081 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4084 // loopNum - the current loop index for which conditions are derived.
4085 // context - data structure where all loop cloning info is kept.
4088 // "false" if conditions cannot be obtained. "true" otherwise.
4089 // The deref conditions are updated in the "derefConditions"[loopNum] field
4090 // of the "context" parameter.
4092 // Definition of Deref Conditions:
4093 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4094 // we should first be able to dereference "a". i.e., "a" is non-null.
4100 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4101 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4104 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4105 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4106 // be true to do the deref.
4108 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4110 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4111 // blocks each of which will branch to slow path if the condition evaluates to false.
4113 // Now, imagine a situation where we have
4114 // a[x][y][k] = 20 and a[i][j][k] = 0
4115 // also in the inner most loop where x, y are parameters, then our conditions will have
4119 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4121 // But these conditions can be checked together with conditions
4122 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4125 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4126 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4127 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4128 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4130 // This naturally yields a tree style pattern, where the nodes of the tree are
4131 // the array and indices respectively.
4147 // Notice that the variables in the same levels can have their conditions combined in the
4148 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4149 // combined with a short-circuiting AND (i.e., different basic blocks).
4152 // Construct a tree of array indices and the array which will generate the optimal
4153 // conditions for loop cloning.
4155 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4170 // In this method, we will construct such a tree by descending depth first into the array
4171 // index operation and forming a tree structure as we encounter the array or the index variables.
4173 // This tree structure will then be used to generate conditions like below:
4174 // (a != null) & (b != null) && // from the first level of the tree.
4176 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4177 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4179 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4180 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4185 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4187 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4190 // Get the dereference-able arrays.
4191 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4193 // For each array in the dereference list, construct a tree,
4194 // where the nodes are array and index variables and an edge 'u-v'
4195 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4196 // 'u-v-w' transitively if u[v][w] occurs.
4197 for (unsigned i = 0; i < deref->Size(); ++i)
4199 LC_Array& array = (*deref)[i];
4201 // First populate the array base variable.
4202 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4203 if (node == nullptr)
4205 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4209 // For each dimension (level) for the array, populate the tree with the variable
4210 // from that dimension.
4211 unsigned rank = (unsigned)array.GetDimRank();
4212 for (unsigned i = 0; i < rank; ++i)
4214 node->EnsureChildren(getAllocator());
4215 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4218 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4219 node->children->Push(tmp);
4222 // Descend one level down.
4226 // Keep the maxRank of all array dereferences.
4227 maxRank = max((int)rank, maxRank);
4233 for (unsigned i = 0; i < nodes.Size(); ++i)
4250 // First level will always yield the null-check, since it is made of the array base variables.
4251 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4252 // So add 1 after rank * 2.
4253 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4255 // Heuristic to not create too many blocks;
4261 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4262 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4263 for (unsigned i = 0; i < nodes.Size(); ++i)
4265 nodes[i]->DeriveLevelConditions(levelCond);
4268 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4273 //----------------------------------------------------------------------------
4274 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4277 // block - the block in which the helper call needs to be inserted.
4278 // insertBefore - the tree before which the helper call will be inserted.
4280 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4282 if (JitConfig.JitDebugLogLoopCloning() == 0)
4286 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4287 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4288 fgInsertStmtBefore(block, insertBefore, stmt);
4289 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4293 //------------------------------------------------------------------------
4294 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4295 // candidates gathered during the cloning phase.
4298 // loopNum - the current loop index for which the optimizations are performed.
4299 // context - data structure where all loop cloning info is kept.
4300 // dynamicPath - If true, the optimization is performed in the fast path among the
4301 // cloned loops. If false, it means this is the only path (i.e.,
4302 // there is no slow path.)
4305 // Perform the optimizations on the fast path i.e., the path in which the
4306 // optimization candidates were collected at the time of identifying them.
4307 // The candidates store all the information necessary (the tree/stmt/block
4308 // they are from) to perform the optimization.
4311 // The unoptimized path is either already cloned when this method is called or
4312 // there is no unoptimized path (got eliminated statically.) So this method
4313 // performs the optimizations assuming that the path in which the candidates
4314 // were collected is the fast path in which the optimizations will be performed.
4316 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4318 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4319 for (unsigned i = 0; i < optInfos->Size(); ++i)
4321 LcOptInfo* optInfo = optInfos->GetRef(i);
4322 switch (optInfo->GetOptType())
4324 case LcOptInfo::LcJaggedArray:
4326 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4327 compCurBB = arrIndexInfo->arrIndex.useBlock;
4328 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4330 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4333 case LcOptInfo::LcMdArray:
4334 // TODO-CQ: CLONE: Implement.
4342 //----------------------------------------------------------------------------
4343 // optCanCloneLoops: Use the environment flag to determine whether loop
4344 // cloning is allowed to be performed.
4347 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4348 // Disabled for retail for now.
4350 bool Compiler::optCanCloneLoops()
4352 // Enabled for retail builds now.
4353 unsigned cloneLoopsFlag = 1;
4355 cloneLoopsFlag = JitConfig.JitCloneLoops();
4357 return (cloneLoopsFlag != 0);
4360 //----------------------------------------------------------------------------
4361 // optIsLoopClonable: Determine whether this loop can be cloned.
4364 // loopInd loop index which needs to be checked if it can be cloned.
4367 // Returns true if the loop can be cloned. If it returns false
4368 // prints a message in debug as why the loop can't be cloned.
4370 bool Compiler::optIsLoopClonable(unsigned loopInd)
4372 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4373 // inserting new EH regions in the exception table yet.
4374 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4375 unsigned loopRetCount = 0;
4376 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4378 if (blk->bbJumpKind == BBJ_RETURN)
4382 if (bbIsTryBeg(blk))
4384 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4389 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4390 // into the middle of a handler (to go to the cloned copy.) Reject.
4391 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4393 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4397 // If the head and entry are in different EH regions, reject.
4398 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4400 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4404 // Is the first block after the last block of the loop a handler or filter start?
4405 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4406 // and go to where the original loop did. That raises problems when we don't actually go to
4407 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4408 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4409 // loop. This is just a corner to cut to get this working faster.
4410 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4411 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4413 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4417 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4418 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4419 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4420 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4421 if (fgReturnCount + loopRetCount > 4)
4423 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4424 "would exceed the limit of 4.\n",
4425 loopRetCount, fgReturnCount);
4429 // Otherwise, we're going to add those return blocks.
4430 fgReturnCount += loopRetCount;
4435 /*****************************************************************************
4437 * Identify loop cloning opportunities, derive loop cloning conditions,
4438 * perform loop cloning, use the derived conditions to choose which
4441 void Compiler::optCloneLoops()
4443 JITDUMP("\n*************** In optCloneLoops()\n");
4444 if (optLoopCount == 0 || !optCanCloneLoops())
4452 printf("Blocks/Trees at start of phase\n");
4453 fgDispBasicBlocks(true);
4457 LoopCloneContext context(optLoopCount, getAllocator());
4459 // Obtain array optimization candidates in the context.
4460 optObtainLoopCloningOpts(&context);
4462 // For each loop, derive cloning conditions for the optimization candidates.
4463 for (unsigned i = 0; i < optLoopCount; ++i)
4465 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4466 if (optInfos == nullptr)
4471 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4473 JITDUMP("> Conditions could not be obtained\n");
4474 context.CancelLoopOptInfo(i);
4478 bool allTrue = false;
4479 bool anyFalse = false;
4480 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4483 context.CancelLoopOptInfo(i);
4487 // Perform static optimizations on the fast path since we always
4488 // have to take the cloned path.
4489 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4491 // No need to clone.
4492 context.CancelLoopOptInfo(i);
4498 // The code in this #if has been useful in debugging loop cloning issues, by
4499 // enabling selective enablement of the loop cloning optimization according to
4502 unsigned methHash = info.compMethodHash();
4503 char* lostr = getenv("loopclonehashlo");
4504 unsigned methHashLo = 0;
4507 sscanf_s(lostr, "%x", &methHashLo);
4508 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4510 char* histr = getenv("loopclonehashhi");
4511 unsigned methHashHi = UINT32_MAX;
4514 sscanf_s(histr, "%x", &methHashHi);
4515 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4517 if (methHash < methHashLo || methHash > methHashHi)
4522 for (unsigned i = 0; i < optLoopCount; ++i)
4524 if (context.GetLoopOptInfo(i) != nullptr)
4527 context.OptimizeConditions(i DEBUGARG(verbose));
4528 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4529 optCloneLoop(i, &context);
4536 printf("\nAfter loop cloning:\n");
4537 fgDispBasicBlocks(/*dumpTrees*/ true);
4542 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4544 assert(loopInd < optLoopCount);
4546 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4547 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4548 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4550 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4551 unsigned depth = optLoopDepth(loopInd);
4552 unsigned ambientWeight = 1;
4553 for (unsigned j = 0; j < depth; j++)
4555 unsigned lastWeight = ambientWeight;
4556 ambientWeight *= BB_LOOP_WEIGHT;
4557 // If the multiplication overflowed, stick at max.
4558 // (Strictly speaking, a multiplication could overflow and still have a result
4559 // that is >= lastWeight...but if so, the original weight must be pretty large,
4560 // and it got bigger, so that's OK.)
4561 if (ambientWeight < lastWeight)
4563 ambientWeight = BB_MAX_WEIGHT;
4568 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4569 // Be safe by taking the max with the head block's weight.
4570 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4572 // This is the containing loop, if any -- to label any blocks we create that are outside
4573 // the loop being cloned.
4574 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4576 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4577 optEnsureUniqueHead(loopInd, ambientWeight);
4579 // We're going to make
4591 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4603 BasicBlock* h = optLoopTable[loopInd].lpHead;
4604 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4606 // Make a new block to be the unique entry to the loop.
4607 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4608 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4609 /*extendRegion*/ true);
4610 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4611 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4612 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4613 newH->bbNatLoopNum = ambientLoop;
4615 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4618 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4619 // "newPred" will be the predecessor of the blocks of the cloned loop.
4620 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4621 BasicBlock* newPred = b;
4622 if (b->bbJumpKind != BBJ_ALWAYS)
4624 BasicBlock* x = b->bbNext;
4627 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4628 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4630 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4631 x2->bbNatLoopNum = ambientLoop;
4634 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4639 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4640 // so that "h" already falls through to "e" (e == t == f).
4641 BasicBlock* h2 = nullptr;
4642 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4644 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4645 /*extendRegion*/ true);
4646 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4648 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4649 h2->bbNatLoopNum = ambientLoop;
4651 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4652 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4655 // Now we'll clone the blocks of the loop body.
4656 BasicBlock* newFirst = nullptr;
4657 BasicBlock* newBot = nullptr;
4659 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4660 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4663 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4664 /*extendRegion*/ true);
4666 // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
4667 // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
4668 // checked them to guarantee they are clonable.
4669 bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
4670 noway_assert(cloneOk);
4671 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4672 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4673 // loop, if one exists -- the parent of the loop we're cloning.
4674 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4676 if (newFirst == nullptr)
4680 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4682 blockMap->Set(blk, newBlk);
4685 // Perform the static optimizations on the fast path.
4686 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4688 // Now go through the new blocks, remapping their jump targets within the loop.
4689 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4693 BasicBlock* newblk = nullptr;
4694 bool b = blockMap->Lookup(blk, &newblk);
4695 assert(b && newblk != nullptr);
4697 assert(blk->bbJumpKind == newblk->bbJumpKind);
4699 // First copy the jump destination(s) from "blk".
4700 optCopyBlkDest(blk, newblk);
4702 // Now redirect the new block according to "blockMap".
4703 optRedirectBlock(newblk, blockMap);
4706 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4707 (h->bbJumpKind == BBJ_ALWAYS));
4709 // If all the conditions are true, go to E2.
4710 BasicBlock* e2 = nullptr;
4711 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4713 h->bbJumpKind = BBJ_COND;
4715 // We will create the following structure
4717 // cond0 (in h) -?> cond1
4718 // slow --> e2 (slow) always
4725 // We should always have block conditions, at the minimum, the array should be deref-able
4726 assert(context->HasBlockConditions(loopInd));
4728 // Create a unique header for the slow path.
4729 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4730 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4731 slowHead->bbNatLoopNum = ambientLoop;
4732 slowHead->bbJumpDest = e2;
4734 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4735 condLast->bbJumpDest = slowHead;
4737 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4740 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4742 assert(foundIt && e2 != nullptr);
4744 fgUpdateChangedFlowGraph();
4747 //--------------------------------------------------------------------------------------------------
4748 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4751 // context loop cloning context variable
4752 // loopNum the loop index
4753 // head loop head for "loopNum"
4754 // slowHead the slow path loop head
4760 // Create the following structure.
4762 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4763 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4765 // cond0 (in h) -?> cond1
4766 // slowHead --> e2 (slowHead) always
4767 // !cond1 -?> slowHead
4768 // !cond2 -?> slowHead
4770 // !condn -?> slowHead
4773 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4775 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4778 BasicBlock* slowHead)
4780 JITDUMP("Inserting loop cloning conditions\n");
4781 assert(context->HasBlockConditions(loopNum));
4783 BasicBlock* curCond = head;
4784 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4785 for (unsigned i = 0; i < levelCond->Size(); ++i)
4787 bool isHeaderBlock = (curCond == head);
4789 // Flip the condition if header block.
4790 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4792 // Create each condition block ensuring wiring between them.
4793 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4794 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4797 curCond->inheritWeight(head);
4798 curCond->bbNatLoopNum = head->bbNatLoopNum;
4799 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4802 // Finally insert cloning conditions after all deref conditions have been inserted.
4803 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4807 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4809 BasicBlock* h = optLoopTable[loopInd].lpHead;
4810 BasicBlock* t = optLoopTable[loopInd].lpTop;
4811 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4812 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4814 // If "h" dominates the entry block, then it is the unique header.
4815 if (fgDominate(h, e))
4820 // Otherwise, create a new empty header block, make it the pred of the entry block,
4821 // and redirect the preds of the entry block to go to this.
4823 BasicBlock* beforeTop = t->bbPrev;
4824 // Make sure that the new block is in the same region as the loop.
4825 // (We will only create loops that are entirely within a region.)
4826 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4827 // This is in the containing loop.
4828 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4829 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4831 // We don't care where it was put; splice it between beforeTop and top.
4832 if (beforeTop->bbNext != h2)
4834 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4835 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4839 if (h2->bbNext != e)
4841 h2->bbJumpKind = BBJ_ALWAYS;
4844 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4846 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4847 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4848 blockMap->Set(e, h2);
4850 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4852 BasicBlock* predBlock = predEntry->flBlock;
4854 // Skip if predBlock is in the loop.
4855 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4859 optRedirectBlock(predBlock, blockMap);
4862 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4865 /*****************************************************************************
4867 * Determine the kind of interference for the call.
4870 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4872 assert(call->gtOper == GT_CALL);
4874 // if not a helper, kills everything
4875 if (call->gtCall.gtCallType != CT_HELPER)
4880 // setfield and array address store kill all indirections
4881 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4883 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4884 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4885 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4886 case CORINFO_HELP_SETFIELDOBJ:
4887 case CORINFO_HELP_ARRADDR_ST:
4889 return CALLINT_REF_INDIRS;
4891 case CORINFO_HELP_SETFIELDFLOAT:
4892 case CORINFO_HELP_SETFIELDDOUBLE:
4893 case CORINFO_HELP_SETFIELD8:
4894 case CORINFO_HELP_SETFIELD16:
4895 case CORINFO_HELP_SETFIELD32:
4896 case CORINFO_HELP_SETFIELD64:
4898 return CALLINT_SCL_INDIRS;
4900 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4901 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4902 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4903 case CORINFO_HELP_SETFIELDSTRUCT:
4905 return CALLINT_ALL_INDIRS;
4911 // other helpers kill nothing
4912 return CALLINT_NONE;
4915 /*****************************************************************************
4917 * See if the given tree can be computed in the given precision (which must
4918 * be smaller than the type of the tree for this to make sense). If 'doit'
4919 * is false, we merely check to see whether narrowing is possible; if we
4920 * get called with 'doit' being true, we actually perform the narrowing.
4923 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4929 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4931 /* Assume we're only handling integer types */
4932 noway_assert(varTypeIsIntegral(srct));
4933 noway_assert(varTypeIsIntegral(dstt));
4935 unsigned srcSize = genTypeSize(srct);
4936 unsigned dstSize = genTypeSize(dstt);
4938 /* dstt must be smaller than srct to narrow */
4939 if (dstSize >= srcSize)
4944 /* Figure out what kind of a node we have */
4945 oper = tree->OperGet();
4946 kind = tree->OperKind();
4948 if (kind & GTK_ASGOP)
4950 noway_assert(doit == false);
4954 ValueNumPair NoVNPair = ValueNumPair();
4956 if (kind & GTK_LEAF)
4960 /* Constants can usually be narrowed by changing their value */
4961 CLANG_FORMAT_COMMENT_ANCHOR;
4963 #ifndef _TARGET_64BIT_
4968 lval = tree->gtIntConCommon.LngValue();
4997 if ((lval & lmask) != lval)
5002 tree->ChangeOperConst(GT_CNS_INT);
5003 tree->gtType = TYP_INT;
5004 tree->gtIntCon.gtIconVal = (int)lval;
5005 if (vnStore != nullptr)
5007 fgValueNumberTreeConst(tree);
5017 ival = tree->gtIntCon.gtIconVal;
5036 #ifdef _TARGET_64BIT_
5043 #endif // _TARGET_64BIT_
5048 if ((ival & imask) != ival)
5053 #ifdef _TARGET_64BIT_
5056 tree->gtType = TYP_INT;
5057 tree->gtIntCon.gtIconVal = (int)ival;
5058 if (vnStore != nullptr)
5060 fgValueNumberTreeConst(tree);
5063 #endif // _TARGET_64BIT_
5067 /* Operands that are in memory can usually be narrowed
5068 simply by changing their gtType */
5071 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5072 if (dstSize == sizeof(int))
5085 noway_assert(doit == false);
5089 if (kind & (GTK_BINOP | GTK_UNOP))
5092 op1 = tree->gtOp.gtOp1;
5094 op2 = tree->gtOp.gtOp2;
5096 switch (tree->gtOper)
5099 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5101 // Is op2 a small constant than can be narrowed into dstt?
5102 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5103 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5105 // We will change the type of the tree and narrow op2
5109 tree->gtType = genActualType(dstt);
5110 tree->SetVNs(vnpNarrow);
5112 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5113 // We may also need to cast away the upper bits of op1
5116 assert(tree->gtType == TYP_INT);
5117 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5119 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5121 tree->gtOp.gtOp1 = op1;
5132 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5134 noway_assert(doit == false);
5142 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5143 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5145 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5146 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5148 noway_assert(doit == false);
5152 /* Simply change the type of the tree */
5156 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5158 tree->gtFlags &= ~GTF_MUL_64RSLT;
5161 tree->gtType = genActualType(dstt);
5162 tree->SetVNs(vnpNarrow);
5170 /* Simply change the type of the tree */
5172 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5174 tree->gtType = genSignedType(dstt);
5175 tree->SetVNs(vnpNarrow);
5177 /* Make sure we don't mess up the variable type */
5178 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5180 tree->gtFlags |= GTF_VAR_CAST;
5193 /* These can always be narrowed since they only represent 0 or 1 */
5198 var_types cast = tree->CastToType();
5199 var_types oprt = op1->TypeGet();
5200 unsigned oprSize = genTypeSize(oprt);
5207 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5212 if (tree->gtOverflow())
5217 /* Is this a cast from the type we're narrowing to or a smaller one? */
5219 if (oprSize <= dstSize)
5221 /* Bash the target type of the cast */
5225 dstt = genSignedType(dstt);
5227 if (oprSize == dstSize)
5229 // Same size: change the CAST into a NOP
5230 tree->ChangeOper(GT_NOP);
5231 tree->gtType = dstt;
5232 tree->gtOp.gtOp2 = nullptr;
5233 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5237 // oprSize is smaller
5238 assert(oprSize < dstSize);
5240 // Change the CastToType in the GT_CAST node
5241 tree->CastToType() = dstt;
5243 // The result type of a GT_CAST is never a small type.
5244 // Use genActualType to widen dstt when it is a small types.
5245 tree->gtType = genActualType(dstt);
5246 tree->SetVNs(vnpNarrow);
5256 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5258 /* Simply change the type of the tree */
5262 tree->gtType = genActualType(dstt);
5263 tree->SetVNs(vnpNarrow);
5270 noway_assert(doit == false);
5278 /*****************************************************************************
5280 * The following logic figures out whether the given variable is assigned
5281 * somewhere in a list of basic blocks (or in an entire loop).
5284 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5286 GenTreePtr tree = *pTree;
5288 if (tree->OperKind() & GTK_ASGOP)
5290 GenTreePtr dest = tree->gtOp.gtOp1;
5291 genTreeOps destOper = dest->OperGet();
5293 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5294 assert(desc && desc->ivaSelf == desc);
5296 if (destOper == GT_LCL_VAR)
5298 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5299 if (tvar < lclMAX_ALLSET_TRACKED)
5301 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5305 desc->ivaMaskIncomplete = true;
5308 if (tvar == desc->ivaVar)
5310 if (tree != desc->ivaSkip)
5316 else if (destOper == GT_LCL_FLD)
5318 /* We can't track every field of every var. Moreover, indirections
5319 may access different parts of the var as different (but
5320 overlapping) fields. So just treat them as indirect accesses */
5322 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5323 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5325 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5326 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5328 else if (destOper == GT_CLS_VAR)
5330 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5332 else if (destOper == GT_IND)
5334 /* Set the proper indirection bits */
5336 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5337 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5340 else if (tree->gtOper == GT_CALL)
5342 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5343 assert(desc && desc->ivaSelf == desc);
5345 desc->ivaMaskCall = optCallInterf(tree);
5348 return WALK_CONTINUE;
5351 /*****************************************************************************/
5353 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5358 desc.ivaSkip = skip;
5360 desc.ivaSelf = &desc;
5363 desc.ivaMaskCall = CALLINT_NONE;
5364 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5370 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5372 noway_assert(stmt->gtOper == GT_STMT);
5373 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5395 /*****************************************************************************/
5396 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5400 /* Get hold of the loop descriptor */
5402 noway_assert(lnum < optLoopCount);
5403 loop = optLoopTable + lnum;
5405 /* Do we already know what variables are assigned within this loop? */
5407 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5414 /* Prepare the descriptor used by the tree walker call-back */
5416 desc.ivaVar = (unsigned)-1;
5417 desc.ivaSkip = nullptr;
5419 desc.ivaSelf = &desc;
5421 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5422 desc.ivaMaskInd = VR_NONE;
5423 desc.ivaMaskCall = CALLINT_NONE;
5424 desc.ivaMaskIncomplete = false;
5426 /* Now walk all the statements of the loop */
5428 beg = loop->lpHead->bbNext;
5429 end = loop->lpBottom;
5431 for (/**/; /**/; beg = beg->bbNext)
5435 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5437 noway_assert(stmt->gtOper == GT_STMT);
5438 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5440 if (desc.ivaMaskIncomplete)
5442 loop->lpFlags |= LPFLG_ASGVARS_INC;
5452 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5453 loop->lpAsgInds = desc.ivaMaskInd;
5454 loop->lpAsgCall = desc.ivaMaskCall;
5456 /* Now we know what variables are assigned in the loop */
5458 loop->lpFlags |= LPFLG_ASGVARS_YES;
5461 /* Now we can finally test the caller's mask against the loop's */
5462 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5467 switch (loop->lpAsgCall)
5471 /* Can't hoist if the call might have side effect on an indirection. */
5473 if (loop->lpAsgInds != VR_NONE)
5480 case CALLINT_REF_INDIRS:
5482 /* Can't hoist if the call might have side effect on an ref indirection. */
5484 if (loop->lpAsgInds & VR_IND_REF)
5491 case CALLINT_SCL_INDIRS:
5493 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5495 if (loop->lpAsgInds & VR_IND_SCL)
5502 case CALLINT_ALL_INDIRS:
5504 /* Can't hoist if the call might have side effect on any indirection. */
5506 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5515 /* Other helpers kill nothing */
5520 noway_assert(!"Unexpected lpAsgCall value");
5526 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5531 printf("\nHoisting a copy of ");
5532 printTreeID(origExpr);
5533 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5534 optLoopTable[lnum].lpBottom->bbNum);
5535 gtDispTree(origExpr);
5540 // This loop has to be in a form that is approved for hoisting.
5541 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5543 // Create a copy of the expression and mark it for CSE's.
5544 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5546 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5547 assert(hoistExpr != origExpr);
5548 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5550 GenTreePtr hoist = hoistExpr;
5551 // The value of the expression isn't used (unless it's an assignment).
5552 if (hoistExpr->OperGet() != GT_ASG)
5554 hoist = gtUnusedValNode(hoistExpr);
5557 /* Put the statement in the preheader */
5559 fgCreateLoopPreHeader(lnum);
5561 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5562 assert(preHead->bbJumpKind == BBJ_NONE);
5564 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5565 // (or in this case, will contain) the expression.
5566 compCurBB = preHead;
5568 // Increment the ref counts of any local vars appearing in "hoist".
5569 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5570 // fold away some of the lcl vars referenced by "hoist".
5571 lvaRecursiveIncRefCounts(hoist);
5573 hoist = fgMorphTree(hoist);
5575 GenTreePtr hoistStmt = gtNewStmt(hoist);
5576 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5578 /* simply append the statement at the end of the preHead's list */
5580 GenTreePtr treeList = preHead->bbTreeList;
5584 /* append after last statement */
5586 GenTreePtr last = treeList->gtPrev;
5587 assert(last->gtNext == nullptr);
5589 last->gtNext = hoistStmt;
5590 hoistStmt->gtPrev = last;
5591 treeList->gtPrev = hoistStmt;
5595 /* Empty pre-header - store the single statement in the block */
5597 preHead->bbTreeList = hoistStmt;
5598 hoistStmt->gtPrev = hoistStmt;
5601 hoistStmt->gtNext = nullptr;
5606 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5611 if (fgStmtListThreaded)
5613 gtSetStmtInfo(hoistStmt);
5614 fgSetStmtSeq(hoistStmt);
5618 if (m_nodeTestData != nullptr)
5621 // What is the depth of the loop "lnum"?
5623 unsigned lnumIter = lnum;
5624 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5627 lnumIter = optLoopTable[lnumIter].lpParent;
5630 NodeToTestDataMap* testData = GetNodeTestData();
5632 TestLabelAndNum tlAndN;
5633 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5635 if (tlAndN.m_num == -1)
5638 printTreeID(origExpr);
5639 printf(" was declared 'do not hoist', but is being hoisted.\n");
5642 else if (tlAndN.m_num != depth)
5645 printTreeID(origExpr);
5646 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5648 tlAndN.m_num, depth);
5653 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5654 // hoist" annotations.
5655 testData->Remove(origExpr);
5656 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5657 tlAndN.m_tl = TL_CSE_Def;
5658 tlAndN.m_num = m_loopHoistCSEClass++;
5659 testData->Set(hoistExpr, tlAndN);
5665 #if LOOP_HOIST_STATS
5666 if (!m_curLoopHasHoistedExpression)
5668 m_loopsWithHoistedExpressions++;
5669 m_curLoopHasHoistedExpression = true;
5671 m_totalHoistedExpressions++;
5672 #endif // LOOP_HOIST_STATS
5675 void Compiler::optHoistLoopCode()
5677 // If we don't have any loops in the method then take an early out now.
5678 if (optLoopCount == 0)
5684 unsigned jitNoHoist = JitConfig.JitNoHoist();
5692 // The code in this #if has been useful in debugging loop cloning issues, by
5693 // enabling selective enablement of the loop cloning optimization according to
5696 unsigned methHash = info.compMethodHash();
5697 char* lostr = getenv("loophoisthashlo");
5698 unsigned methHashLo = 0;
5701 sscanf_s(lostr, "%x", &methHashLo);
5702 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5704 char* histr = getenv("loophoisthashhi");
5705 unsigned methHashHi = UINT32_MAX;
5708 sscanf_s(histr, "%x", &methHashHi);
5709 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5711 if (methHash < methHashLo || methHash > methHashHi)
5713 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5715 #endif // 0 -- debugging loop cloning issues
5720 printf("\n*************** In optHoistLoopCode()\n");
5721 printf("Blocks/Trees before phase\n");
5722 fgDispBasicBlocks(true);
5727 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5728 // they are invariant.)
5729 LoopHoistContext hoistCtxt(this);
5730 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5732 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5737 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5739 optHoistLoopNest(lnum, &hoistCtxt);
5748 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5749 fgDispBasicBlocks(true);
5753 // Make sure that the predecessor lists are accurate
5754 fgDebugCheckBBlist();
5759 // Test Data stuff..
5760 // If we have no test data, early out.
5761 if (m_nodeTestData == nullptr)
5765 NodeToTestDataMap* testData = GetNodeTestData();
5766 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5768 TestLabelAndNum tlAndN;
5769 GenTreePtr node = ki.Get();
5770 bool b = testData->Lookup(node, &tlAndN);
5772 if (tlAndN.m_tl != TL_LoopHoist)
5776 // Otherwise, it is a loop hoist annotation.
5777 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5778 if (tlAndN.m_num >= 0)
5782 printf(" was declared 'must hoist', but has not been hoisted.\n");
5789 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5791 // Do this loop, then recursively do all nested loops.
5792 CLANG_FORMAT_COMMENT_ANCHOR;
5794 #if LOOP_HOIST_STATS
5796 m_curLoopHasHoistedExpression = false;
5797 m_loopsConsidered++;
5798 #endif // LOOP_HOIST_STATS
5800 optHoistThisLoop(lnum, hoistCtxt);
5802 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5804 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5806 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5807 // TODO-Cleanup: we should have a set abstraction for loops.
5808 if (hoistedInCurLoop != nullptr)
5810 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5814 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5816 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5820 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5821 child = optLoopTable[child].lpSibling)
5823 optHoistLoopNest(child, hoistCtxt);
5827 // TODO-Cleanup: we should have a set abstraction for loops.
5828 if (hoistedInCurLoop != nullptr)
5830 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5832 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5833 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5839 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5841 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5843 /* If loop was removed continue */
5845 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5850 /* Get the head and tail of the loop */
5852 BasicBlock* head = pLoopDsc->lpHead;
5853 BasicBlock* tail = pLoopDsc->lpBottom;
5854 BasicBlock* lbeg = pLoopDsc->lpEntry;
5857 // We must have a do-while loop
5858 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5863 // The loop-head must dominate the loop-entry.
5864 // TODO-CQ: Couldn't we make this true if it's not?
5865 if (!fgDominate(head, lbeg))
5870 // if lbeg is the start of a new try block then we won't be able to hoist
5871 if (!BasicBlock::sameTryRegion(head, lbeg))
5876 // We don't bother hoisting when inside of a catch block
5877 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5882 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5884 unsigned begn = lbeg->bbNum;
5885 unsigned endn = tail->bbNum;
5887 // Ensure the per-loop sets/tables are empty.
5888 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5893 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5894 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5898 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5900 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5901 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5902 pLoopDsc->lpHoistedExprCount = 0;
5904 #ifndef _TARGET_64BIT_
5905 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5907 if (longVarsCount > 0)
5909 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5910 // the Counts such that each TYP_LONG variable counts twice.
5912 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5913 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5918 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5919 lvaDispVarSet(lvaLongVars);
5922 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5923 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5925 #endif // !_TARGET_64BIT_
5930 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5931 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5933 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5934 lvaDispVarSet(pLoopDsc->lpVarInOut);
5936 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5937 lvaDispVarSet(loopVars);
5942 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5944 if (floatVarsCount > 0)
5946 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5947 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5949 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5950 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5951 pLoopDsc->lpHoistedFPExprCount = 0;
5953 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5954 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5959 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5960 lvaDispVarSet(inOutFPVars);
5962 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5963 lvaDispVarSet(loopFPVars);
5967 else // (floatVarsCount == 0)
5969 pLoopDsc->lpLoopVarFPCount = 0;
5970 pLoopDsc->lpVarInOutFPCount = 0;
5971 pLoopDsc->lpHoistedFPExprCount = 0;
5974 // Find the set of definitely-executed blocks.
5975 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5976 // Until we have post-dominators, we'll special-case for single-exit blocks.
5977 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5978 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5980 assert(pLoopDsc->lpExit != nullptr);
5981 BasicBlock* cur = pLoopDsc->lpExit;
5982 // Push dominators, until we reach "entry" or exit the loop.
5983 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5988 // If we didn't reach the entry block, give up and *just* push the entry block.
5989 if (cur != pLoopDsc->lpEntry)
5993 defExec.Push(pLoopDsc->lpEntry);
5995 else // More than one exit
5997 // We'll assume that only the entry block is definitely executed.
5998 // We could in the future do better.
5999 defExec.Push(pLoopDsc->lpEntry);
6002 while (defExec.Size() > 0)
6004 // Consider in reverse order: dominator before dominatee.
6005 BasicBlock* blk = defExec.Pop();
6006 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
6010 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
6011 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
6013 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6014 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
6015 unsigned blkWeight = blk->getBBWeight(this);
6020 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
6021 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
6022 firstBlockAndBeforeSideEffect ? "true" : "false");
6023 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6025 printf(" block weight is too small to perform hoisting.\n");
6030 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6032 // Block weight is too small to perform hoisting.
6036 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6038 GenTreePtr stmtTree = stmt->gtStmtExpr;
6040 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6043 // we will try to hoist the top-level stmtTree
6044 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6049 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6051 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6053 bool loopContainsCall = pLoopDsc->lpContainsCall;
6056 int hoistedExprCount;
6060 if (varTypeIsFloating(tree->TypeGet()))
6062 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6063 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6064 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6066 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6067 if (!loopContainsCall)
6069 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6072 // For ARM each double takes two FP registers
6073 // For now on ARM we won't track singles/doubles
6074 // and instead just assume that we always have doubles.
6081 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6082 loopVarCount = pLoopDsc->lpLoopVarCount;
6083 varInOutCount = pLoopDsc->lpVarInOutCount;
6085 availRegCount = CNT_CALLEE_SAVED - 1;
6086 if (!loopContainsCall)
6088 availRegCount += CNT_CALLEE_TRASH - 1;
6090 #ifndef _TARGET_64BIT_
6091 // For our 32-bit targets Long types take two registers.
6092 if (varTypeIsLong(tree->TypeGet()))
6094 availRegCount = (availRegCount + 1) / 2;
6099 // decrement the availRegCount by the count of expression that we have already hoisted.
6100 availRegCount -= hoistedExprCount;
6102 // the variables that are read/written inside the loop should
6103 // always be a subset of the InOut variables for the loop
6104 assert(loopVarCount <= varInOutCount);
6106 // When loopVarCount >= availRegCount we believe that all of the
6107 // available registers will get used to hold LclVars inside the loop.
6108 // This pessimistically assumes that each loopVar has a conflicting
6109 // lifetime with every other loopVar.
6110 // For this case we will hoist the expression only if is profitable
6111 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6112 // as we believe it will be placed in the stack or one of the other
6113 // loopVars will be spilled into the stack
6115 if (loopVarCount >= availRegCount)
6117 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6118 if (tree->gtCostEx < (2 * IND_COST_EX))
6124 // When varInOutCount < availRegCount we are know that there are
6125 // some available register(s) when we enter the loop body.
6126 // When varInOutCount == availRegCount there often will be a register
6127 // available when we enter the loop body, since a loop often defines a
6128 // LclVar on exit or there is often at least one LclVar that is worth
6129 // spilling to the stack to make way for this hoisted expression.
6130 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6132 if (varInOutCount > availRegCount)
6134 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6135 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6145 // This function returns true if 'tree' is a loop invariant expression.
6146 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6148 bool Compiler::optHoistLoopExprsForTree(
6149 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6151 // First do the children.
6152 // We must keep track of whether each child node was hoistable or not
6154 unsigned nChildren = tree->NumChildren();
6155 bool childrenHoistable[GenTree::MAX_CHILDREN];
6157 // Initialize the array elements for childrenHoistable[] to false
6158 for (unsigned i = 0; i < nChildren; i++)
6160 childrenHoistable[i] = false;
6163 bool treeIsInvariant = true;
6164 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6166 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6167 &childrenHoistable[childNum]))
6169 treeIsInvariant = false;
6173 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6175 bool treeIsHoistable = treeIsInvariant;
6177 // But we must see if anything else prevents "tree" from being hoisted.
6179 if (treeIsInvariant)
6181 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6182 treeIsHoistable = optIsCSEcandidate(tree);
6184 // If it's a call, it must be a helper call, and be pure.
6185 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6186 // (meaning it won't run a cctor because the class is not precise-init).
6187 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6189 GenTreeCall* call = tree->AsCall();
6190 if (call->gtCallType != CT_HELPER)
6192 treeIsHoistable = false;
6196 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6197 if (!s_helperCallProperties.IsPure(helpFunc))
6199 treeIsHoistable = false;
6201 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6203 treeIsHoistable = false;
6208 if (treeIsHoistable)
6210 if (!(*pFirstBlockAndBeforeSideEffect))
6212 // For now, we give up on an expression that might raise an exception if it is after the
6213 // first possible global side effect (and we assume we're after that if we're not in the first block).
6214 // TODO-CQ: this is when we might do loop cloning.
6216 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6218 treeIsHoistable = false;
6221 // Currently we must give up on reads from static variables (even if we are in the first block).
6223 if (tree->OperGet() == GT_CLS_VAR)
6225 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6227 treeIsHoistable = false;
6231 // Is the value of the whole tree loop invariant?
6233 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6235 // Is the value of the whole tree loop invariant?
6236 if (!treeIsInvariant)
6238 treeIsHoistable = false;
6242 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6243 // If we encounter a tree with a call in it
6244 // or if we see an assignment to global we set it to false.
6246 // If we are already set to false then we can skip these checks
6248 if (*pFirstBlockAndBeforeSideEffect)
6250 // For this purpose, we only care about memory side effects. We assume that expressions will
6251 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6252 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6253 // here, since that includes exceptions.)
6256 // If it's a call, it must be a helper call that does not mutate the heap.
6257 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6258 // (meaning it won't run a cctor because the class is not precise-init).
6259 GenTreeCall* call = tree->AsCall();
6260 if (call->gtCallType != CT_HELPER)
6262 *pFirstBlockAndBeforeSideEffect = false;
6266 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6267 if (s_helperCallProperties.MutatesHeap(helpFunc))
6269 *pFirstBlockAndBeforeSideEffect = false;
6271 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6273 *pFirstBlockAndBeforeSideEffect = false;
6277 else if (tree->OperIsAssignment())
6279 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6280 GenTreePtr lhs = tree->gtOp.gtOp1;
6281 if (lhs->gtFlags & GTF_GLOB_REF)
6283 *pFirstBlockAndBeforeSideEffect = false;
6286 else if (tree->OperIsCopyBlkOp())
6288 GenTreePtr args = tree->gtOp.gtOp1;
6289 assert(args->OperGet() == GT_LIST);
6290 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6292 *pFirstBlockAndBeforeSideEffect = false;
6297 // If this 'tree' is hoistable then we return and the caller will
6298 // decide to hoist it as part of larger hoistable expression.
6300 if (!treeIsHoistable)
6302 // We are not hoistable so we will now hoist any hoistable children.
6304 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6306 if (childrenHoistable[childNum])
6308 // We can't hoist the LHS of an assignment, isn't a real use.
6309 if (childNum == 0 && (tree->OperIsAssignment()))
6314 GenTreePtr child = tree->GetChild(childNum);
6316 // We try to hoist this 'child' tree
6317 optHoistCandidate(child, lnum, hoistCtxt);
6322 *pHoistable = treeIsHoistable;
6323 return treeIsInvariant;
6326 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6328 if (lnum == BasicBlock::NOT_IN_LOOP)
6330 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6334 // The outer loop also must be suitable for hoisting...
6335 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6340 // If the hoisted expression isn't valid at this loop head then break
6341 if (!optTreeIsValidAtLoopHead(tree, lnum))
6346 // It must pass the hoistable profitablity tests for this loop level
6347 if (!optIsProfitableToHoistableTree(tree, lnum))
6353 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6355 // already hoisted in a parent loop, so don't hoist this expression.
6359 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6361 // already hoisted this expression in the current loop, so don't hoist this expression.
6365 // Expression can be hoisted
6366 optPerformHoistExpr(tree, lnum);
6368 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6369 if (!varTypeIsFloating(tree->TypeGet()))
6371 optLoopTable[lnum].lpHoistedExprCount++;
6372 #ifndef _TARGET_64BIT_
6373 // For our 32-bit targets Long types take two registers.
6374 if (varTypeIsLong(tree->TypeGet()))
6376 optLoopTable[lnum].lpHoistedExprCount++;
6380 else // Floating point expr hoisted
6382 optLoopTable[lnum].lpHoistedFPExprCount++;
6385 // Record the hoisted expression in hoistCtxt
6386 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6389 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6391 // If it is not a VN, is not loop-invariant.
6392 if (vn == ValueNumStore::NoVN)
6397 // We'll always short-circuit constants.
6398 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6403 // If we've done this query previously, don't repeat.
6404 bool previousRes = false;
6405 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6412 if (vnStore->GetVNFunc(vn, &funcApp))
6414 if (funcApp.m_func == VNF_PhiDef)
6416 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6417 VNFuncApp phiDefValFuncApp;
6418 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6420 // It's not *really* a definition, rather a pass-through of some other VN.
6421 // (This could occur, say if both sides of an if-then-else diamond made the
6422 // same assignment to a variable.)
6423 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6427 // Is the definition within the loop? If so, is not loop-invariant.
6428 unsigned lclNum = funcApp.m_args[0];
6429 unsigned ssaNum = funcApp.m_args[1];
6430 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6431 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6434 else if (funcApp.m_func == VNF_PhiHeapDef)
6436 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6437 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6441 for (unsigned i = 0; i < funcApp.m_arity; i++)
6443 // TODO-CQ: We need to either make sure that *all* VN functions
6444 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6446 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6456 // Non-function "new, unique" VN's may be annotated with the loop nest where
6457 // their definition occurs.
6458 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
6460 if (vnLoopNum == MAX_LOOP_NUM)
6466 res = !optLoopContains(lnum, vnLoopNum);
6470 loopVnInvariantCache->Set(vn, res);
6474 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6476 if (tree->OperIsLocal())
6478 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6479 unsigned lclNum = lclVar->gtLclNum;
6481 // The lvlVar must be have an Ssa tracked lifetime
6482 if (fgExcludeFromSsa(lclNum))
6487 // If the loop does not contains the SSA def we can hoist it.
6488 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6493 else if (tree->OperIsConst())
6497 else // If every one of the children nodes are valid at this Loop's Head.
6499 unsigned nChildren = tree->NumChildren();
6500 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6502 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6512 /*****************************************************************************
6514 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6515 * header. The pre-header will replace the current lpHead in the loop table.
6516 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6517 * will also be dominated by the loop-top, lpHead->bbNext.
6521 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6523 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6525 /* This loop has to be a "do-while" loop */
6527 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6529 /* Have we already created a loop-preheader block? */
6531 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6536 BasicBlock* head = pLoopDsc->lpHead;
6537 BasicBlock* top = pLoopDsc->lpTop;
6538 BasicBlock* entry = pLoopDsc->lpEntry;
6540 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6541 if (!BasicBlock::sameTryRegion(head, entry))
6546 // Ensure that lpHead always dominates lpEntry
6548 noway_assert(fgDominate(head, entry));
6550 /* Get hold of the first block of the loop body */
6552 assert(top == entry);
6554 /* Allocate a new basic block */
6556 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6557 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6559 // Must set IL code offset
6560 preHead->bbCodeOffs = top->bbCodeOffs;
6562 // Set the default value of the preHead weight in case we don't have
6563 // valid profile data and since this blocks weight is just an estimate
6564 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6566 preHead->inheritWeight(head);
6567 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6572 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6573 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6577 // The preheader block is part of the containing loop (if any).
6578 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6580 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6582 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6584 preHead->bbWeight = 0;
6585 preHead->bbFlags |= BBF_RUN_RARELY;
6589 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6590 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6591 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6593 if (allValidProfileWeights)
6595 double loopEnteredCount;
6596 double loopSkippedCount;
6598 if (fgHaveValidEdgeWeights)
6600 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6601 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6602 noway_assert(edgeToNext != nullptr);
6603 noway_assert(edgeToJump != nullptr);
6606 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6608 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6612 loopEnteredCount = (double)head->bbNext->bbWeight;
6613 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6616 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6618 // Calculate a good approximation of the preHead's block weight
6619 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6620 preHead->setBBWeight(max(preHeadWeight, 1));
6621 noway_assert(!preHead->isRunRarely());
6626 // Link in the preHead block.
6627 fgInsertBBbefore(top, preHead);
6629 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6630 // However, that is too expensive at this point. Instead, we update the phi
6631 // node block references, if we created pre-header block due to hoisting.
6632 // This is sufficient because any definition participating in SSA that flowed
6633 // into the phi via the loop header block will now flow through the preheader
6634 // block from the header block.
6636 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6638 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6639 if (tree->OperGet() != GT_ASG)
6643 GenTreePtr op2 = tree->gtGetOp2();
6644 if (op2->OperGet() != GT_PHI)
6648 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6649 while (args != nullptr)
6651 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6652 if (phiArg->gtPredBB == head)
6654 phiArg->gtPredBB = preHead;
6656 args = args->Rest();
6660 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6661 // to set the handler index on the pre header without updating the exception table.
6662 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6664 // Update the EH table to make the hoisted block part of the loop's EH block.
6665 fgExtendEHRegionBefore(top);
6667 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6668 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6670 /* Update the loop entry */
6672 pLoopDsc->lpHead = preHead;
6673 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6675 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6676 All predecessors of 'beg', (which is the entry in the loop)
6677 now have to jump to 'preHead', unless they are dominated by 'head' */
6679 preHead->bbRefs = 0;
6680 fgAddRefPred(preHead, head);
6681 bool checkNestedLoops = false;
6683 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6685 BasicBlock* predBlock = pred->flBlock;
6687 if (fgDominate(top, predBlock))
6689 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6690 // (we know that 'head' dominates 'top'), but using 'top' instead of
6691 // 'head' in the test allows us to not enter here if 'predBlock == head'
6693 if (predBlock != pLoopDsc->lpBottom)
6695 noway_assert(predBlock != head);
6696 checkNestedLoops = true;
6701 switch (predBlock->bbJumpKind)
6704 noway_assert(predBlock == head);
6708 if (predBlock == head)
6710 noway_assert(predBlock->bbJumpDest != top);
6716 case BBJ_EHCATCHRET:
6717 noway_assert(predBlock->bbJumpDest == top);
6718 predBlock->bbJumpDest = preHead;
6719 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6721 if (predBlock == head)
6723 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6724 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6725 // Just break, pred will be removed after switch.
6729 fgRemoveRefPred(top, predBlock);
6730 fgAddRefPred(preHead, predBlock);
6736 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6737 BasicBlock** jumpTab;
6738 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6743 if ((*jumpTab) == top)
6745 (*jumpTab) = preHead;
6747 fgRemoveRefPred(top, predBlock);
6748 fgAddRefPred(preHead, predBlock);
6749 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6751 } while (++jumpTab, --jumpCnt);
6754 noway_assert(!"Unexpected bbJumpKind");
6759 noway_assert(!fgGetPredForBlock(top, preHead));
6760 fgRemoveRefPred(top, head);
6761 fgAddRefPred(top, preHead);
6764 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6765 (other than the back-edge of the loop we are considering) then we likely have nested
6766 do-while loops with the same entry block and inserting the preheader block changes the head
6767 of all the nested loops. Now we will update this piece of information in the loop table, and
6768 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6769 do-while loops with the same entry block).
6771 if (checkNestedLoops)
6773 for (unsigned l = 0; l < optLoopCount; l++)
6775 if (optLoopTable[l].lpHead == head)
6777 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6778 noway_assert(optLoopTable[l].lpEntry == top);
6779 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6780 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6784 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6785 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6793 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6795 for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
6797 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6801 if (optLoopTable[lnum].lpEntry == blk)
6810 void Compiler::optComputeLoopSideEffects()
6813 for (lnum = 0; lnum < optLoopCount; lnum++)
6815 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6816 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6817 optLoopTable[lnum].lpContainsCall = false;
6820 for (lnum = 0; lnum < optLoopCount; lnum++)
6822 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6827 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6828 { // Is outermost...
6829 optComputeLoopNestSideEffects(lnum);
6833 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6834 #ifndef _TARGET_64BIT_
6835 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6838 for (unsigned i = 0; i < lvaCount; i++)
6840 LclVarDsc* varDsc = &lvaTable[i];
6841 if (varDsc->lvTracked)
6843 if (varTypeIsFloating(varDsc->lvType))
6845 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6847 #ifndef _TARGET_64BIT_
6848 else if (varTypeIsLong(varDsc->lvType))
6850 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6857 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6859 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6860 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6861 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6863 optComputeLoopSideEffectsOfBlock(bbInLoop);
6867 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6869 unsigned mostNestedLoop = blk->bbNatLoopNum;
6870 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6872 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6874 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6876 // Now iterate over the remaining statements, and their trees.
6877 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6879 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6881 genTreeOps oper = tree->OperGet();
6883 // Even after we set heapHavoc we still may want to know if a loop contains calls
6886 if (oper == GT_CALL)
6888 // Record that this loop contains a call
6889 AddContainsCallAllContainingLoops(mostNestedLoop);
6892 // If we just set lpContainsCall or it was previously set
6893 if (optLoopTable[mostNestedLoop].lpContainsCall)
6895 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6899 // We are just looking for GT_CALL nodes after heapHavoc was set.
6903 // otherwise heapHavoc is not set
6906 // This body is a distillation of the heap-side effect code of value numbering.
6907 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6908 // that the compiler creates.
6910 if (GenTree::OperIsAssignment(oper))
6912 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6914 if (lhs->OperGet() == GT_IND)
6916 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6917 FieldSeqNode* fldSeqArrElem = nullptr;
6919 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6927 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6929 // If it's a local byref for which we recorded a value number, use that...
6930 GenTreeLclVar* argLcl = arg->AsLclVar();
6931 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6934 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6936 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6937 funcApp.m_func == VNF_PtrToArrElem)
6939 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6940 CORINFO_CLASS_HANDLE elemType =
6941 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6942 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6943 // Don't set heapHavoc below.
6950 // Is the LHS an array index expression?
6951 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6953 // We actually ignore "fldSeq" -- any modification to an S[], at any
6954 // field of "S", will lose all information about the array type.
6955 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6956 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6960 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6962 GenTreePtr obj = nullptr; // unused
6963 GenTreePtr staticOffset = nullptr; // unused
6964 FieldSeqNode* fldSeq = nullptr;
6966 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6967 (fldSeq != FieldSeqStore::NotAField()))
6969 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6970 assert(fldSeq != nullptr);
6971 if (fldSeq->IsFirstElemFieldSeq())
6973 fldSeq = fldSeq->m_next;
6974 assert(fldSeq != nullptr);
6977 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6985 else if (lhs->OperIsBlk())
6987 GenTreeLclVarCommon* lclVarTree;
6989 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6991 // For now, assume arbitrary side effects on the heap...
6995 else if (lhs->OperGet() == GT_CLS_VAR)
6997 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6999 // Otherwise, must be local lhs form. I should assert that.
7000 else if (lhs->OperGet() == GT_LCL_VAR)
7002 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
7003 GenTreePtr rhs = tree->gtOp.gtOp2;
7004 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
7005 // If we gave the RHS a value number, propagate it.
7006 if (rhsVN != ValueNumStore::NoVN)
7008 rhsVN = vnStore->VNNormVal(rhsVN);
7009 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
7011 lvaTable[lhsLcl->GetLclNum()]
7012 .GetPerSsaData(lhsLcl->GetSsaNum())
7013 ->m_vnPair.SetLiberal(rhsVN);
7018 else // not GenTree::OperIsAssignment(oper)
7023 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7027 // Is it an addr of a array index expression?
7029 GenTreePtr addrArg = tree->gtOp.gtOp1;
7030 if (addrArg->OperGet() == GT_IND)
7032 // Is the LHS an array index expression?
7033 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7036 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7038 CORINFO_CLASS_HANDLE elemType =
7039 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7040 tree->gtVNPair.SetBoth(
7041 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7042 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7043 // The rest are dummy arguments.
7044 vnStore->VNForNull(), vnStore->VNForNull(),
7045 vnStore->VNForNull()));
7051 case GT_LOCKADD: // Binop
7052 case GT_XADD: // Binop
7053 case GT_XCHG: // Binop
7054 case GT_CMPXCHG: // Specialop
7062 GenTreeCall* call = tree->AsCall();
7064 // Record that this loop contains a call
7065 AddContainsCallAllContainingLoops(mostNestedLoop);
7067 if (call->gtCallType == CT_HELPER)
7069 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7070 if (s_helperCallProperties.MutatesHeap(helpFunc))
7074 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7076 // If the call is labeled as "Hoistable", then we've checked the
7077 // class that would be constructed, and it is not precise-init, so
7078 // the cctor will not be run by this call. Otherwise, it might be,
7079 // and might have arbitrary side effects.
7080 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7094 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7103 // Record that all loops containing this block have heap havoc effects.
7104 unsigned lnum = mostNestedLoop;
7105 while (lnum != BasicBlock::NOT_IN_LOOP)
7107 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7108 lnum = optLoopTable[lnum].lpParent;
7113 // Marks the containsCall information to "lnum" and any parent loops.
7114 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7116 assert(0 <= lnum && lnum < optLoopCount);
7117 while (lnum != BasicBlock::NOT_IN_LOOP)
7119 optLoopTable[lnum].lpContainsCall = true;
7120 lnum = optLoopTable[lnum].lpParent;
7124 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7125 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7127 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7128 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7130 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7131 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7134 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7135 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7137 assert(0 <= lnum && lnum < optLoopCount);
7138 while (lnum != BasicBlock::NOT_IN_LOOP)
7140 optLoopTable[lnum].AddVariableLiveness(this, blk);
7141 lnum = optLoopTable[lnum].lpParent;
7145 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7146 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7148 assert(0 <= lnum && lnum < optLoopCount);
7149 while (lnum != BasicBlock::NOT_IN_LOOP)
7151 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7152 lnum = optLoopTable[lnum].lpParent;
7156 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7157 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7159 assert(0 <= lnum && lnum < optLoopCount);
7160 while (lnum != BasicBlock::NOT_IN_LOOP)
7162 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7163 lnum = optLoopTable[lnum].lpParent;
7167 /*****************************************************************************
7169 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7170 * The 'keepList'is either a single tree or a list of trees that are formed by
7171 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7172 * gtExtractSideEffList method.
7176 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7178 GenTreePtr tree = *pTree;
7179 Compiler* comp = data->compiler;
7180 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7182 // We may have a non-NULL side effect list that is being kept
7186 GenTreePtr keptTree = keepList;
7187 while (keptTree->OperGet() == GT_COMMA)
7189 assert(keptTree->OperKind() & GTK_SMPOP);
7190 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7191 GenTreePtr op2 = keptTree->gtGetOp2();
7193 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7194 // that is being kept because it contains some side-effect
7198 // This tree and all of its sub trees are being kept.
7199 return WALK_SKIP_SUBTREES;
7202 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7203 // which can again be another GT_COMMA or the final side-effect part
7207 if (tree == keptTree)
7209 // This tree and all of its sub trees are being kept.
7210 return WALK_SKIP_SUBTREES;
7214 // This node is being removed from the graph of GenTreePtr
7216 // Look for any local variable references
7218 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7223 /* This variable ref is going away, decrease its ref counts */
7225 lclNum = tree->gtLclVarCommon.gtLclNum;
7226 assert(lclNum < comp->lvaCount);
7227 varDsc = comp->lvaTable + lclNum;
7229 // make sure it's been initialized
7230 assert(comp->compCurBB != nullptr);
7231 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7233 /* Decrement its lvRefCnt and lvRefCntWtd */
7235 // Use getBBWeight to determine the proper block weight.
7236 // This impacts the block weights when we have IBC data.
7237 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7240 return WALK_CONTINUE;
7243 /*****************************************************************************
7245 * Routine called to decrement the LclVar ref counts when removing a tree
7246 * during the remove RangeCheck phase.
7247 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7248 * unless the node is found in the 'keepList' (which are saved side effects)
7249 * The keepList is communicated using the walkData.pCallbackData field
7250 * Also the compCurBB must be set to the current BasicBlock which contains
7251 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7254 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7256 // We communicate this value using the walkData.pCallbackData field
7258 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7261 /*****************************************************************************
7263 * Given an array index node, mark it as not needing a range check.
7266 void Compiler::optRemoveRangeCheck(
7267 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7283 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7286 noway_assert(stmt->gtOper == GT_STMT);
7287 noway_assert(tree->gtOper == GT_COMMA);
7288 noway_assert(tree->gtOp.gtOp1->OperIsBoundsCheck());
7289 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7291 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7296 printf("Before optRemoveRangeCheck:\n");
7301 GenTreePtr sideEffList = nullptr;
7304 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7307 // Decrement the ref counts for any LclVars that are being deleted
7309 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7311 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7312 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7314 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7315 tree->gtFlags |= GTF_DONT_CSE;
7317 /* Recalculate the gtCostSz, etc... */
7318 gtSetStmtInfo(stmt);
7320 /* Re-thread the nodes if necessary */
7321 if (fgStmtListThreaded)
7329 printf("After optRemoveRangeCheck:\n");
7335 /*****************************************************************************
7336 * Return the scale in an array reference, given a pointer to the
7337 * multiplication node.
7340 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7343 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7344 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7346 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7348 if (mul->gtOper == GT_LSH)
7350 scale = ((ssize_t)1) << scale;
7353 GenTreePtr index = mul->gtOp.gtOp1;
7355 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7357 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7358 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7359 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7360 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7361 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7362 index = index->gtOp.gtOp1;
7365 assert(!bRngChk || index->gtOper != GT_COMMA);
7375 /*****************************************************************************
7376 * Find the last assignment to of the local variable in the block. Return
7377 * RHS or NULL. If any local variable in the RHS has been killed in
7378 * intervening code, return NULL. If the variable being searched for is killed
7379 * in the intervening code, return NULL.
7383 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7385 VARSET_TP* pKilledInOut,
7386 bool* pLhsRhsKilledAfterInit)
7388 assert(pKilledInOut);
7389 assert(pLhsRhsKilledAfterInit);
7391 *pLhsRhsKilledAfterInit = false;
7393 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7395 GenTreePtr list = block->bbTreeList;
7396 if (list == nullptr)
7401 GenTreePtr rhs = nullptr;
7402 GenTreePtr stmt = list;
7405 stmt = stmt->gtPrev;
7406 if (stmt == nullptr)
7411 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7412 // If we encounter an assignment to a local variable,
7413 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7415 // And the assigned variable equals the input local,
7416 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7418 // If the assignment is '=' and it is not a conditional, then return rhs.
7419 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7421 rhs = tree->gtOp.gtOp2;
7423 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7424 // as we found a kill to the input local.
7427 *pLhsRhsKilledAfterInit = true;
7428 assert(rhs == nullptr);
7434 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7435 if (varDsc == nullptr)
7439 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7442 } while (stmt != list);
7449 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7450 varRefKinds rhsRefs = VR_NONE;
7451 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7452 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7453 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7455 // If RHS has been indirectly referenced, consider it a write and a kill.
7456 *pLhsRhsKilledAfterInit = true;
7463 /*****************************************************************************
7465 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7470 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7472 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7474 add1 += op1->gtIntCon.gtIconVal;
7475 add2 += op2->gtIntCon.gtIconVal;
7479 /* Check for +/- constant on either operand */
7481 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7483 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7484 op1 = op1->gtOp.gtOp1;
7487 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7489 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7490 op2 = op2->gtOp.gtOp1;
7493 /* We only allow local variable references */
7495 if (op1->gtOper != GT_LCL_VAR)
7497 if (op2->gtOper != GT_LCL_VAR)
7499 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7502 /* NOTE: Caller ensures that this variable has only one def */
7504 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7505 // printf("size [%d]:\n", add2); gtDispTree(op2);
7509 return (bool)(add1 <= add2);
7514 //------------------------------------------------------------------------------
7515 // optObtainLoopCloningOpts: Identify optimization candidates and update
7516 // the "context" for array optimizations.
7519 // context - data structure where all loop cloning info is kept. The
7520 // optInfo fields of the context are updated with the
7521 // identified optimization candidates.
7523 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7525 for (unsigned i = 0; i < optLoopCount; i++)
7527 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7528 if (optIsLoopClonable(i))
7530 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7532 optIdentifyLoopOptInfo(i, context);
7535 JITDUMP("------------------------------------------------------------\n");
7540 //------------------------------------------------------------------------
7541 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7542 // check if the loop is suitable for the optimizations performed.
7545 // loopNum - the current loop index for which conditions are derived.
7546 // context - data structure where all loop cloning candidates will be
7550 // If the loop is not suitable for the optimizations, return false - context
7551 // should not contain any optimization candidate for the loop if false.
7552 // Else return true.
7555 // Check if the loop is well formed for this optimization and identify the
7556 // optimization candidates and update the "context" parameter with all the
7557 // contextual information necessary to perform the optimization later.
7559 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7561 noway_assert(loopNum < optLoopCount);
7563 LoopDsc* pLoop = &optLoopTable[loopNum];
7565 if (!(pLoop->lpFlags & LPFLG_ITER))
7567 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7571 unsigned ivLclNum = pLoop->lpIterVar();
7572 if (lvaVarAddrExposed(ivLclNum))
7574 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7578 BasicBlock* head = pLoop->lpHead;
7579 BasicBlock* end = pLoop->lpBottom;
7580 BasicBlock* beg = head->bbNext;
7582 if (end->bbJumpKind != BBJ_COND)
7584 JITDUMP("> Couldn't find termination test.\n");
7588 if (end->bbJumpDest != beg)
7590 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7594 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7595 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7597 JITDUMP("> Loop iteration operator not matching\n");
7601 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7602 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7604 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7608 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7609 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7610 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7611 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7613 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7614 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7618 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7620 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7621 pLoop->lpTestTree->gtTreeID);
7626 GenTreePtr op1 = pLoop->lpIterator();
7627 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7630 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7631 end->bbNext ? end->bbNext->bbNum : 0);
7633 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7634 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7637 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7640 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7647 //---------------------------------------------------------------------------------------------------------------
7648 // optExtractArrIndex: Try to extract the array index from "tree".
7651 // tree the tree to be checked if it is the array [] operation.
7652 // result the extracted GT_INDEX information is updated in result.
7653 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7656 // Returns true if array index can be extracted, else, return false. See assumption about
7657 // what will be extracted. The "result" variable's rank parameter is advanced for every
7658 // dimension of [] encountered.
7661 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7662 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7663 // to reconstruct this to be able to know if this is an array access.
7666 // The method extracts only if the array base and indices are GT_LCL_VAR.
7668 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7670 // [000000001AF828D8] ---XG------- indir int
7671 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7672 // [000000001AF87340] ------------ + byref
7673 // [000000001AF87160] -------N---- const long 2
7674 // [000000001AF871D8] ------------ << long
7675 // [000000001AF870C0] ------------ cast long <- int
7676 // [000000001AF86F30] i----------- lclVar int V04 loc0
7677 // [000000001AF87250] ------------ + byref
7678 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7679 // [000000001AF87468] ---XG------- comma int
7680 // [000000001AF87020] ---X-------- arrBndsChk void
7681 // [000000001AF86FA8] ---X-------- arrLen int
7682 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7683 // [000000001AF82860] ------------ lclVar int V04 loc0
7684 // [000000001AF829F0] -A-XG------- = int
7685 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7687 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7689 if (tree->gtOper != GT_COMMA)
7693 GenTreePtr before = tree->gtGetOp1();
7694 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7698 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7699 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7703 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7707 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7708 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7713 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7715 GenTreePtr after = tree->gtGetOp2();
7717 if (after->gtOper != GT_IND)
7721 // It used to be the case that arrBndsChks for struct types would fail the previous check because
7722 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
7723 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
7724 // that we were not previously cloning.)
7725 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
7727 if (varTypeIsStruct(after))
7732 GenTreePtr sibo = after->gtGetOp1();
7733 if (sibo->gtOper != GT_ADD)
7737 GenTreePtr sib = sibo->gtGetOp1();
7738 GenTreePtr ofs = sibo->gtGetOp2();
7739 if (ofs->gtOper != GT_CNS_INT)
7743 if (sib->gtOper != GT_ADD)
7747 GenTreePtr si = sib->gtGetOp2();
7748 GenTreePtr base = sib->gtGetOp1();
7749 if (si->gtOper != GT_LSH)
7753 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7757 GenTreePtr scale = si->gtGetOp2();
7758 GenTreePtr index = si->gtGetOp1();
7759 if (scale->gtOper != GT_CNS_INT)
7763 #ifdef _TARGET_AMD64_
7764 if (index->gtOper != GT_CAST)
7768 GenTreePtr indexVar = index->gtGetOp1();
7770 GenTreePtr indexVar = index;
7772 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7776 if (lhsNum == BAD_VAR_NUM)
7778 result->arrLcl = arrLcl;
7780 result->indLcls.Push(indLcl);
7781 result->bndsChks.Push(tree);
7782 result->useBlock = compCurBB;
7788 //---------------------------------------------------------------------------------------------------------------
7789 // optReconstructArrIndex: Reconstruct array index.
7792 // tree the tree to be checked if it is an array [][][] operation.
7793 // result the extracted GT_INDEX information.
7794 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7797 // Returns true if array index can be extracted, else, return false. "rank" field in
7798 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7801 // Recursively look for a list of array indices. In the example below, we encounter,
7802 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7803 // The return value would then be:
7804 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7806 // V00[V01][V02] would be morphed as:
7808 // [000000001B366848] ---XG------- indir int
7809 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7810 // [000000001B36C200] ---XG------- comma int
7811 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7812 // [000000001B36C278] -A-XG------- comma int
7813 // [000000001B366730] R--XG------- indir ref
7814 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7815 // [000000001B36C818] ---XG------- comma ref
7816 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7817 // [000000001B36BB60] -A-XG------- = ref
7818 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7819 // [000000001B36A668] -A-XG------- = int
7820 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7823 // The method extracts only if the array base and indices are GT_LCL_VAR.
7825 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7827 // If we can extract "tree" (which is a top level comma) return.
7828 if (optExtractArrIndex(tree, result, lhsNum))
7832 // We have a comma (check if array base expr is computed in "before"), descend further.
7833 else if (tree->OperGet() == GT_COMMA)
7835 GenTreePtr before = tree->gtGetOp1();
7836 // "before" should evaluate an array base for the "after" indexing.
7837 if (before->OperGet() != GT_ASG)
7841 GenTreePtr lhs = before->gtGetOp1();
7842 GenTreePtr rhs = before->gtGetOp2();
7844 // "rhs" should contain an GT_INDEX
7845 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7849 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7850 GenTreePtr after = tree->gtGetOp2();
7851 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7852 return optExtractArrIndex(after, result, lhsNum);
7858 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7860 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7863 //-------------------------------------------------------------------------
7864 // optIsStackLocalInvariant: Is stack local invariant in loop.
7867 // loopNum The loop in which the variable is tested for invariance.
7868 // lclNum The local that is tested for invariance in the loop.
7871 // Returns true if the variable is loop invariant in loopNum.
7873 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7875 if (lvaVarAddrExposed(lclNum))
7879 if (optIsVarAssgLoop(loopNum, lclNum))
7886 //----------------------------------------------------------------------------------------------
7887 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7888 // identify as potential candidate and update the loop context.
7891 // tree The tree encountered during the tree walk.
7892 // info Supplies information about the current block or stmt in which the tree is.
7893 // Also supplies the "context" pointer for updating with loop cloning
7894 // candidates. Also supplies loopNum.
7897 // If array index can be reconstructed, check if the iter var of the loop matches the
7898 // array index var in some dim. Also ensure other index vars before the identified
7899 // dim are loop invariant.
7902 // Skip sub trees if the optimization candidate is identified or else continue walking
7904 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7906 ArrIndex arrIndex(getAllocator());
7908 // Check if array index can be optimized.
7909 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7911 assert(tree->gtOper == GT_COMMA);
7915 JITDUMP("Found ArrIndex at tree ");
7917 printf(" which is equivalent to: ");
7922 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7924 return WALK_SKIP_SUBTREES;
7927 // Walk the dimensions and see if iterVar of the loop is used as index.
7928 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7930 // Is index variable also used as the loop iter var.
7931 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7933 // Check the previous indices are all loop invariant.
7934 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7936 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7938 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7939 return WALK_SKIP_SUBTREES;
7945 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7947 JITDUMP(" on dim %d\n", dim);
7950 // Update the loop context.
7951 info->context->EnsureLoopOptInfo(info->loopNum)
7952 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7956 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7960 return WALK_SKIP_SUBTREES;
7962 else if (tree->gtOper == GT_ARR_ELEM)
7964 // TODO-CQ: CLONE: Implement.
7965 return WALK_SKIP_SUBTREES;
7967 return WALK_CONTINUE;
7970 struct optRangeCheckDsc
7972 Compiler* pCompiler;
7976 Walk to make sure that only locals and constants are contained in the index
7979 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7981 GenTreePtr tree = *pTree;
7982 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7984 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7986 pData->bValidIndex = false;
7990 if (tree->gtOper == GT_LCL_VAR)
7992 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7994 pData->bValidIndex = false;
7999 return WALK_CONTINUE;
8003 returns true if a range check can legally be removed (for the moment it checks
8004 that the array is a local array (non subject to racing conditions) and that the
8005 index is either a constant or a local
8007 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
8009 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
8010 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
8011 GenTreePtr pArray = bndsChk->GetArray();
8012 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
8016 GenTreePtr pIndex = bndsChk->gtIndex;
8018 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
8019 // Otherwise we can be targeted by malicious race-conditions.
8020 if (pArray != nullptr)
8022 if (pArray->gtOper != GT_LCL_VAR)
8028 printf("Can't remove range check if the array isn't referenced with a local\n");
8036 noway_assert(pArray->gtType == TYP_REF);
8037 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8039 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8041 // If the array address has been taken, don't do the optimization
8042 // (this restriction can be lowered a bit, but i don't think it's worth it)
8043 CLANG_FORMAT_COMMENT_ANCHOR;
8047 printf("Can't remove range check if the array has its address taken\n");
8056 optRangeCheckDsc Data;
8057 Data.pCompiler = this;
8058 Data.bValidIndex = true;
8060 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8062 if (!Data.bValidIndex)
8067 printf("Can't remove range check with this index");
8078 /******************************************************************************
8080 * Replace x==null with (x|x)==0 if x is a GC-type.
8081 * This will stress code-gen and the emitter to make sure they support such trees.
8086 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8088 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8093 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8094 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8096 noway_assert(condStmt->gtOper == GT_JTRUE);
8101 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8103 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8108 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8113 GenTreePtr comparandClone = gtCloneExpr(comparand);
8115 // Bump up the ref-counts of any variables in 'comparandClone'
8116 compCurBB = condBlock;
8117 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8119 noway_assert(relop->gtOp.gtOp1 == comparand);
8120 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8121 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8123 // Comparand type is already checked, and we have const int, there is no harm
8124 // morphing it into a TYP_I_IMPL.
8125 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8126 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8131 /******************************************************************************
8132 * Function used by folding of boolean conditionals
8133 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8134 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8135 * being a boolean lclVar and "op2" the const 0/1.
8136 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8137 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8138 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8139 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8140 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8143 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8145 bool isBool = false;
8147 noway_assert(condBranch->gtOper == GT_JTRUE);
8148 GenTree* cond = condBranch->gtOp.gtOp1;
8150 /* The condition must be "!= 0" or "== 0" */
8152 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8157 /* Return the compare node to the caller */
8161 /* Get hold of the comparands */
8163 GenTree* opr1 = cond->gtOp.gtOp1;
8164 GenTree* opr2 = cond->gtOp.gtOp2;
8166 if (opr2->gtOper != GT_CNS_INT)
8171 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8176 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8178 /* Is the value a boolean?
8179 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8180 * a local variable that is marked as being boolean (lvIsBoolean) */
8182 if (opr1->gtFlags & GTF_BOOLEAN)
8186 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8190 else if (opr1->gtOper == GT_LCL_VAR)
8192 /* is it a boolean local variable */
8194 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8195 noway_assert(lclNum < lvaCount);
8197 if (lvaTable[lclNum].lvIsBoolean)
8203 /* Was our comparison against the constant 1 (i.e. true) */
8206 // If this is a boolean expression tree we can reverse the relop
8207 // and change the true to false.
8210 gtReverseCond(cond);
8211 opr2->gtIntCon.gtIconVal = 0;
8223 void Compiler::optOptimizeBools()
8228 printf("*************** In optOptimizeBools()\n");
8231 printf("Blocks/Trees before phase\n");
8232 fgDispBasicBlocks(true);
8242 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8244 /* We're only interested in conditional jumps here */
8246 if (b1->bbJumpKind != BBJ_COND)
8251 /* If there is no next block, we're done */
8253 BasicBlock* b2 = b1->bbNext;
8259 /* The next block must not be marked as BBF_DONT_REMOVE */
8260 if (b2->bbFlags & BBF_DONT_REMOVE)
8265 /* The next block also needs to be a condition */
8267 if (b2->bbJumpKind != BBJ_COND)
8270 optOptimizeBoolsGcStress(b1);
8275 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8277 if (b1->bbJumpDest == b2->bbJumpDest)
8279 /* Given the following sequence of blocks :
8283 we wil try to fold it to :
8284 B1: brtrue(t1|t2, BX)
8290 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8292 /* Given the following sequence of blocks :
8296 we will try to fold it to :
8297 B1: brtrue((!t1)&&t2, B3)
8308 /* The second block must contain a single statement */
8310 GenTreePtr s2 = b2->bbTreeList;
8311 if (s2->gtPrev != s2)
8316 noway_assert(s2->gtOper == GT_STMT);
8317 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8318 noway_assert(t2->gtOper == GT_JTRUE);
8320 /* Find the condition for the first block */
8322 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8324 noway_assert(s1->gtOper == GT_STMT);
8325 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8326 noway_assert(t1->gtOper == GT_JTRUE);
8328 if (b2->countOfInEdges() > 1)
8333 /* Find the branch conditions of b1 and b2 */
8337 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8343 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8349 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8350 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8352 // Leave out floats where the bit-representation is more complicated
8353 // - there are two representations for 0.
8355 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8360 // Make sure the types involved are of the same sizes
8361 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8365 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8369 #ifdef _TARGET_ARMARCH_
8370 // Skip the small operand which we cannot encode.
8371 if (varTypeIsSmall(c1->TypeGet()))
8374 /* The second condition must not contain side effects */
8376 if (c2->gtFlags & GTF_GLOB_EFFECT)
8381 /* The second condition must not be too expensive */
8385 if (c2->gtCostEx > 12)
8392 var_types foldType = c1->TypeGet();
8393 if (varTypeIsGC(foldType))
8395 foldType = TYP_I_IMPL;
8400 /* Both conditions must be the same */
8402 if (t1->gtOper != t2->gtOper)
8407 if (t1->gtOper == GT_EQ)
8409 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8410 So we will branch to BX if (c1&c2)==0 */
8417 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8418 So we will branch to BX if (c1|c2)!=0 */
8426 /* The b1 condition must be the reverse of the b2 condition */
8428 if (t1->gtOper == t2->gtOper)
8433 if (t1->gtOper == GT_EQ)
8435 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8436 So we will branch to BX if (c1&c2)!=0 */
8443 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8444 So we will branch to BX if (c1|c2)==0 */
8451 // Anding requires both values to be 0 or 1
8453 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8459 // Now update the trees
8461 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8464 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8465 cmpOp1->gtFlags |= GTF_BOOLEAN;
8469 t1->gtOp.gtOp1 = cmpOp1;
8470 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8472 #if FEATURE_SET_FLAGS
8473 // For comparisons against zero we will have the GTF_SET_FLAGS set
8474 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8475 // during the CSE phase.
8477 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8478 // as they are no longer feeding directly into a comparisons against zero
8480 // Make sure that the GTF_SET_FLAGS bit is cleared.
8481 // Fix 388436 ARM JitStress WP7
8482 c1->gtFlags &= ~GTF_SET_FLAGS;
8483 c2->gtFlags &= ~GTF_SET_FLAGS;
8485 // The new top level node that we just created does feed directly into
8486 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8487 // we generate an instuction that sets the flags, which allows us
8488 // to omit the cmp with zero instruction.
8490 // Request that the codegen for cmpOp1 sets the condition flags
8491 // when it generates the code for cmpOp1.
8493 cmpOp1->gtRequestSetFlags();
8496 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8499 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8503 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8507 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8509 fgRemoveRefPred(b1->bbJumpDest, b1);
8511 b1->bbJumpDest = b2->bbJumpDest;
8513 fgAddRefPred(b2->bbJumpDest, b1);
8516 noway_assert(edge1 != nullptr);
8517 noway_assert(edge2 != nullptr);
8519 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8520 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8521 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8523 edge1->flEdgeWeightMin = edgeSumMin;
8524 edge1->flEdgeWeightMax = edgeSumMax;
8528 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8529 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8532 /* Get rid of the second block (which is a BBJ_COND) */
8534 noway_assert(b1->bbJumpKind == BBJ_COND);
8535 noway_assert(b2->bbJumpKind == BBJ_COND);
8536 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8537 noway_assert(b1->bbNext == b2);
8538 noway_assert(b2->bbNext);
8541 b2->bbFlags |= BBF_REMOVED;
8543 // If b2 was the last block of a try or handler, update the EH table.
8545 ehUpdateForDeletedBlock(b2);
8547 /* Update bbRefs and bbPreds */
8549 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8550 * Remove pred 'b2' for 'b2->bbJumpDest' */
8552 fgReplacePred(b2->bbNext, b2, b1);
8554 fgRemoveRefPred(b2->bbJumpDest, b2);
8556 /* Update the block numbers and try again */
8568 // Update loop table
8569 fgUpdateLoopsAfterCompacting(b1, b2);
8574 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8575 b1->bbNum, b2->bbNum);
8584 fgDebugCheckBBlist();