1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
827 optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
830 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
832 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
834 else if (limitOp->gtOper == GT_ARR_LENGTH)
836 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
842 // Save the type of the comparison between the iterator and the limit.
843 optLoopTable[loopInd].lpTestTree = relop;
847 //----------------------------------------------------------------------------------
848 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
851 // incr The incr tree to be checked. Whether incr tree is
852 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
855 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
856 // and the rhs limit is extracted from the "test" tree. The limit information is
857 // added to the loop table.
860 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
862 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
865 genTreeOps updateOper;
866 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
867 if (iterVar != BAD_VAR_NUM)
869 // We have v = v op y type asg node.
882 // Increment should be by a const int.
883 // TODO-CQ: CLONE: allow variable increments.
884 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
893 //----------------------------------------------------------------------------------
894 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
897 // from, to - are blocks (beg, end) which are part of the loop.
898 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
899 // pIterVar - see return value.
902 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
906 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
907 // assigned in the loop.
909 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
912 unsigned iterVar = optIsLoopIncrTree(incr);
913 if (iterVar == BAD_VAR_NUM)
917 if (optIsVarAssigned(from, to, incr, iterVar))
919 JITDUMP("iterVar is assigned in loop\n");
927 //----------------------------------------------------------------------------------
928 // optIsLoopTestEvalIntoTemp:
929 // Pattern match if the test tree is computed into a tmp
930 // and the "tmp" is used as jump condition for loop termination.
933 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
934 // where Vtmp contains the actual loop test result.
935 // newStmt - contains the statement that is the actual test stmt involving
936 // the loop iterator.
939 // Returns true if a new test tree can be obtained.
942 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
943 // Then returns the rhs for def of Vtmp as the "test" node.
946 // This method just retrieves what it thinks is the "test" node,
947 // the callers are expected to verify that "iterVar" is used in the test.
949 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
951 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
953 if (test->gtOper != GT_JTRUE)
958 GenTreePtr relop = test->gtGetOp1();
959 noway_assert(relop->OperIsCompare());
961 GenTreePtr opr1 = relop->gtOp.gtOp1;
962 GenTreePtr opr2 = relop->gtOp.gtOp2;
964 // Make sure we have jtrue (vtmp != 0)
965 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
966 opr2->IsIntegralConst(0))
968 // Get the previous statement to get the def (rhs) of Vtmp to see
969 // if the "test" is evaluated into Vtmp.
970 GenTreePtr prevStmt = testStmt->gtPrev;
971 if (prevStmt == nullptr)
976 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
977 if (tree->OperGet() == GT_ASG)
979 GenTreePtr lhs = tree->gtOp.gtOp1;
980 GenTreePtr rhs = tree->gtOp.gtOp2;
982 // Return as the new test node.
983 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
985 if (rhs->OperIsCompare())
996 //----------------------------------------------------------------------------------
997 // optExtractInitTestIncr:
998 // Extract the "init", "test" and "incr" nodes of the loop.
1001 // head - Loop head block
1002 // bottom - Loop bottom block
1003 // top - Loop top block
1004 // ppInit - The init stmt of the loop if found.
1005 // ppTest - The test stmt of the loop if found.
1006 // ppIncr - The incr stmt of the loop if found.
1009 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1010 // returns true. Returns false if the information can't be extracted.
1013 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1014 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1015 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1018 // +-------<-----------------<-----------+
1021 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1023 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1024 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1025 // in the jtrue condition.
1028 // This method just retrieves what it thinks is the "test" node,
1029 // the callers are expected to verify that "iterVar" is used in the test.
1031 bool Compiler::optExtractInitTestIncr(
1032 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1034 assert(ppInit != nullptr);
1035 assert(ppTest != nullptr);
1036 assert(ppIncr != nullptr);
1038 // Check if last two statements in the loop body are the increment of the iterator
1039 // and the loop termination test.
1040 noway_assert(bottom->bbTreeList != nullptr);
1041 GenTreePtr test = bottom->bbTreeList->gtPrev;
1042 noway_assert(test != nullptr && test->gtNext == nullptr);
1045 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1050 // Check if we have the incr tree before the test tree, if we don't,
1051 // check if incr is part of the loop "top".
1052 GenTreePtr incr = test->gtPrev;
1053 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1055 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1060 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1061 GenTreePtr topLast = top->bbTreeList->gtPrev;
1062 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1072 assert(test != incr);
1074 // Find the last statement in the loop pre-header which we expect to be the initialization of
1075 // the loop iterator.
1076 GenTreePtr phdr = head->bbTreeList;
1077 if (phdr == nullptr)
1082 GenTreePtr init = phdr->gtPrev;
1083 noway_assert(init != nullptr && (init->gtNext == nullptr));
1085 // If it is a duplicated loop condition, skip it.
1086 if (init->gtFlags & GTF_STMT_CMPADD)
1088 // Must be a duplicated loop condition.
1089 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1090 init = init->gtPrev;
1091 noway_assert(init != nullptr);
1094 noway_assert(init->gtOper == GT_STMT);
1095 noway_assert(test->gtOper == GT_STMT);
1096 noway_assert(incr->gtOper == GT_STMT);
1098 *ppInit = init->gtStmt.gtStmtExpr;
1099 *ppTest = test->gtStmt.gtStmtExpr;
1100 *ppIncr = incr->gtStmt.gtStmtExpr;
1105 /*****************************************************************************
1107 * Record the loop in the loop table.
1110 void Compiler::optRecordLoop(BasicBlock* head,
1116 unsigned char exitCnt)
1118 // Record this loop in the table, if there's room.
1120 assert(optLoopCount <= MAX_LOOP_NUM);
1121 if (optLoopCount == MAX_LOOP_NUM)
1124 loopOverflowThisMethod = true;
1129 // Assumed preconditions on the loop we're adding.
1130 assert(first->bbNum <= top->bbNum);
1131 assert(top->bbNum <= entry->bbNum);
1132 assert(entry->bbNum <= bottom->bbNum);
1133 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1135 // If the new loop contains any existing ones, add it in the right place.
1136 unsigned char loopInd = optLoopCount;
1137 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1139 unsigned char prev = prevPlus1 - 1;
1140 if (optLoopTable[prev].lpContainedBy(first, bottom))
1145 // Move up any loops if necessary.
1146 for (unsigned j = optLoopCount; j > loopInd; j--)
1148 optLoopTable[j] = optLoopTable[j - 1];
1152 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1154 // The loop is well-formed.
1155 assert(optLoopTable[i].lpWellFormed());
1156 // Check for disjoint.
1157 if (optLoopTable[i].lpDisjoint(first, bottom))
1161 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1162 assert(optLoopTable[i].lpContainedBy(first, bottom));
1166 optLoopTable[loopInd].lpHead = head;
1167 optLoopTable[loopInd].lpFirst = first;
1168 optLoopTable[loopInd].lpTop = top;
1169 optLoopTable[loopInd].lpBottom = bottom;
1170 optLoopTable[loopInd].lpEntry = entry;
1171 optLoopTable[loopInd].lpExit = exit;
1172 optLoopTable[loopInd].lpExitCnt = exitCnt;
1174 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1175 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1176 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1178 optLoopTable[loopInd].lpFlags = 0;
1180 // We haven't yet recorded any side effects.
1181 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1182 optLoopTable[loopInd].lpFieldsModified = nullptr;
1183 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1185 // If DO-WHILE loop mark it as such.
1186 if (head->bbNext == entry)
1188 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1191 // If single exit loop mark it as such.
1195 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1199 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1200 // We have the following restrictions:
1201 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1202 // 2. There must be a loop iterator (a local var) that is
1203 // incremented (decremented or lsh, rsh, mul) with a constant value
1204 // 3. The iterator is incremented exactly once
1205 // 4. The loop condition must use the iterator.
1207 if (bottom->bbJumpKind == BBJ_COND)
1212 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1217 unsigned iterVar = BAD_VAR_NUM;
1218 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1223 // Make sure the "iterVar" initialization is never skipped,
1224 // i.e. HEAD dominates the ENTRY.
1225 if (!fgDominate(head, entry))
1230 if (!optPopulateInitInfo(loopInd, init, iterVar))
1235 // Check that the iterator is used in the loop condition.
1236 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1241 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1242 // Record the iterator, the pointer to the test node
1243 // and the initial value of the iterator (constant or local var)
1244 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1247 optLoopTable[loopInd].lpIterTree = incr;
1250 // Save the initial value of the iterator - can be lclVar or constant
1251 // Flag the loop accordingly.
1257 simpleTestLoopCount++;
1260 // Check if a constant iteration loop.
1261 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1263 // This is a constant loop.
1264 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1266 constIterLoopCount++;
1273 printf("\nConstant loop initializer:\n");
1276 printf("\nConstant loop body:\n");
1278 BasicBlock* block = head;
1281 block = block->bbNext;
1282 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1284 if (stmt->gtStmt.gtStmtExpr == incr)
1289 gtDispTree(stmt->gtStmt.gtStmtExpr);
1291 } while (block != bottom);
1297 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1302 //------------------------------------------------------------------------
1303 // optPrintLoopRecording: Print a recording of the loop.
1306 // loopInd - loop index.
1308 void Compiler::optPrintLoopRecording(unsigned loopInd)
1310 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1311 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1312 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1313 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1314 optLoopTable[loopInd].lpExit);
1316 // If an iterator loop print the iterator and the initialization.
1317 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1319 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1321 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1323 printf("%d )", optLoopTable[loopInd].lpIterConst());
1325 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1327 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1329 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1331 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1334 // If a simple test condition print operator and the limits */
1335 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1337 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1339 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1342 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1344 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1353 void Compiler::optCheckPreds()
1356 BasicBlock* blockPred;
1359 for (block = fgFirstBB; block; block = block->bbNext)
1361 for (pred = block->bbPreds; pred; pred = pred->flNext)
1363 // make sure this pred is part of the BB list
1364 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1366 if (blockPred == pred->flBlock)
1371 noway_assert(blockPred);
1372 switch (blockPred->bbJumpKind)
1375 if (blockPred->bbJumpDest == block)
1381 noway_assert(blockPred->bbNext == block);
1383 case BBJ_EHFILTERRET:
1385 case BBJ_EHCATCHRET:
1386 noway_assert(blockPred->bbJumpDest == block);
1397 /*****************************************************************************
1398 * Find the natural loops, using dominators. Note that the test for
1399 * a loop is slightly different from the standard one, because we have
1400 * not done a depth first reordering of the basic blocks.
1403 void Compiler::optFindNaturalLoops()
1408 printf("*************** In optFindNaturalLoops()\n");
1414 flowList* predEntry;
1416 noway_assert(fgDomsComputed);
1420 hasMethodLoops = false;
1421 loopsThisMethod = 0;
1422 loopOverflowThisMethod = false;
1425 /* We will use the following terminology:
1426 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1427 Not part of the looping of the loop.
1428 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1429 * but not the outer loop. ???)
1430 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1431 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1432 * EXIT - the loop exit or the block right after the bottom
1433 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1435 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1469 unsigned char exitCount;
1471 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1477 // Blocks that are rarely run have a zero bbWeight and should
1478 // never be optimized here
1480 if (top->bbWeight == BB_ZERO_WEIGHT)
1485 for (pred = top->bbPreds; pred; pred = pred->flNext)
1487 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1488 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1489 * as the definition says, but merely an indication that we have a loop there).
1490 * Thus, we have to be very careful and after entry discovery check that it is indeed
1491 * the only place we enter the loop (especially for non-reducible flow graphs).
1494 bottom = pred->flBlock;
1497 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1499 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1500 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1501 (bottom->bbJumpKind == BBJ_SWITCH))
1503 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1504 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1508 BasicBlock* loopBlock;
1510 /* The presence of a "back edge" is an indication that a loop might be present here
1513 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1514 * node in the loop to any other node in the loop (wholly within the loop)
1515 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1516 * in the loop from outside the loop, and that is through the ENTRY
1519 /* Let's find the loop ENTRY */
1521 if (head->bbJumpKind == BBJ_ALWAYS)
1523 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1525 /* OK - we enter somewhere within the loop */
1526 entry = head->bbJumpDest;
1528 /* some useful asserts
1529 * Cannot enter at the top - should have being caught by redundant jumps */
1531 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1535 /* special case - don't consider now */
1536 // assert (!"Loop entered in weird way!");
1540 // Can we fall through into the loop?
1541 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1543 /* The ENTRY is at the TOP (a do-while loop) */
1548 goto NO_LOOP; // head does not flow into the loop bail for now
1551 // Now we find the "first" block -- the earliest block reachable within the loop.
1552 // This is usually the same as "top", but can differ in rare cases where "top" is
1553 // the entry block of a nested loop, and that nested loop branches backwards to a
1554 // a block before "top". We find this by searching for such backwards branches
1555 // in the loop known so far.
1556 BasicBlock* first = top;
1557 BasicBlock* newFirst;
1558 bool blocksToSearch = true;
1559 BasicBlock* validatedAfter = bottom->bbNext;
1560 while (blocksToSearch)
1562 blocksToSearch = false;
1564 blocksToSearch = false;
1565 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1567 unsigned nSucc = loopBlock->NumSucc();
1568 for (unsigned j = 0; j < nSucc; j++)
1570 BasicBlock* succ = loopBlock->GetSucc(j);
1571 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1572 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1578 if (newFirst != nullptr)
1580 validatedAfter = first;
1582 blocksToSearch = true;
1586 // Is "head" still before "first"? If not, we don't have a valid loop...
1587 if (head->bbNum >= first->bbNum)
1590 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1591 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1595 /* Make sure ENTRY dominates all blocks in the loop
1596 * This is necessary to ensure condition 2. above
1597 * At the same time check if the loop has a single exit
1598 * point - those loops are easier to optimize */
1600 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1602 if (!fgDominate(entry, loopBlock))
1607 if (loopBlock == bottom)
1609 if (bottom->bbJumpKind != BBJ_ALWAYS)
1611 /* there is an exit at the bottom */
1613 noway_assert(bottom->bbJumpDest == top);
1620 BasicBlock* exitPoint;
1622 switch (loopBlock->bbJumpKind)
1625 case BBJ_CALLFINALLY:
1627 case BBJ_EHCATCHRET:
1628 assert(loopBlock->bbJumpDest);
1629 exitPoint = loopBlock->bbJumpDest;
1631 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1633 /* exit from a block other than BOTTOM */
1642 case BBJ_EHFINALLYRET:
1643 case BBJ_EHFILTERRET:
1644 /* The "try" associated with this "finally" must be in the
1645 * same loop, so the finally block will return control inside the loop */
1650 /* those are exits from the loop */
1658 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1659 BasicBlock** jumpTab;
1660 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1664 noway_assert(*jumpTab);
1665 exitPoint = *jumpTab;
1667 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1672 } while (++jumpTab, --jumpCnt);
1676 noway_assert(!"Unexpected bbJumpKind");
1681 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1682 * This is to ensure condition 1. above which prevents marking fake loops
1684 * Below is an example:
1692 * The example above is not a loop since we bail after the first iteration
1694 * The condition we have to check for is
1695 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1696 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1698 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1699 * loop bottom then we cannot iterate
1701 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1702 * part of the loop nodes (as per definition they are loop exits executed only once),
1703 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1705 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1707 switch (loopBlock->bbJumpKind)
1712 if (fgDominate(loopBlock, bottom))
1721 bool canIterateLoop = false;
1723 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1725 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1727 canIterateLoop = true;
1730 else if (predEntry->flBlock != head)
1732 // The entry block has multiple predecessors outside the loop; the 'head'
1733 // block isn't the only one. We only support a single 'head', so bail.
1738 if (!canIterateLoop)
1743 /* Double check - make sure that all loop blocks except ENTRY
1744 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1745 * us from considering non-loops due to incorrectly assuming that we had a back edge
1748 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1751 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1753 if (loopBlock == entry)
1758 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1760 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1762 // noway_assert(!"Found loop with multiple entries");
1768 // Disqualify loops where the first block of the loop is less nested in EH than
1769 // the bottom block. That is, we don't want to handle loops where the back edge
1770 // goes from within an EH region to a first block that is outside that same EH
1771 // region. Note that we *do* handle loops where the first block is the *first*
1772 // block of a more nested EH region (since it is legal to branch to the first
1773 // block of an immediately more nested EH region). So, for example, disqualify
1780 // BB10 BBJ_COND => BB02
1784 // Here, BB10 is more nested than BB02.
1786 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1788 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1790 first->bbNum, bottom->bbNum);
1794 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1795 // Disqualify loops where the first block of the loop is a finally target.
1796 // The main problem is when multiple loops share a 'first' block that is a finally
1797 // target and we canonicalize the loops by adding a new loop head. In that case, we
1798 // need to update the blocks so the finally target bit is moved to the newly created
1799 // block, and removed from the old 'first' block. This is 'hard', so at this point
1800 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1801 // long-term), it's easier to disallow the loop than to update the flow graph to
1802 // support this case.
1804 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1806 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1809 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1811 /* At this point we have a loop - record it in the loop table
1812 * If we found only one exit, record it in the table too
1813 * (otherwise an exit = 0 in the loop table means multiple exits) */
1820 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1823 if (!hasMethodLoops)
1825 /* mark the method as containing natural loops */
1827 hasMethodLoops = true;
1830 /* increment total number of loops found */
1834 /* keep track of the number of exits */
1835 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1836 #endif // COUNT_LOOPS
1839 /* current predecessor not good for a loop - continue with another one, if any */
1845 loopCountTable.record(loopsThisMethod);
1846 if (maxLoopsPerMethod < loopsThisMethod)
1848 maxLoopsPerMethod = loopsThisMethod;
1850 if (loopOverflowThisMethod)
1852 totalLoopOverflows++;
1854 #endif // COUNT_LOOPS
1856 // Now the loop indices are stable. We can figure out parent/child relationships
1857 // (using table indices to name loops), and label blocks.
1858 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1860 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1863 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1865 optLoopTable[loopInd].lpParent = possibleParent;
1866 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1867 optLoopTable[possibleParent].lpChild = loopInd;
1873 // Now label the blocks with the innermost loop to which they belong. Since parents
1874 // precede children in the table, doing the labeling for each loop in order will achieve
1875 // this -- the innermost loop labeling will be done last.
1876 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1878 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1879 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1880 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1882 blk->bbNatLoopNum = loopInd;
1887 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1891 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1892 // one, if necessary, for loops containing others that share a "top."
1894 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1896 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1897 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1903 if (optCanonicalizeLoopNest(loopInd))
1910 fgUpdateChangedFlowGraph();
1914 if (verbose && optLoopCount > 0)
1916 printf("\nFinal natural loop table:\n");
1917 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1919 optPrintLoopInfo(loopInd);
1926 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1928 BasicBlock* newJumpDest = nullptr;
1929 switch (blk->bbJumpKind)
1934 case BBJ_EHFILTERRET:
1935 case BBJ_EHFINALLYRET:
1936 case BBJ_EHCATCHRET:
1937 // These have no jump destination to update.
1942 case BBJ_CALLFINALLY:
1944 // All of these have a single jump destination to update.
1945 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1947 blk->bbJumpDest = newJumpDest;
1953 bool redirected = false;
1954 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1956 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1958 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1962 // If any redirections happend, invalidate the switch table map for the switch.
1965 GetSwitchDescMap()->Remove(blk);
1975 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1976 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1978 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1980 // copy the jump destination(s) from "from" to "to".
1981 switch (to->bbJumpKind)
1985 case BBJ_CALLFINALLY:
1987 // All of these have a single jump destination to update.
1988 to->bbJumpDest = from->bbJumpDest;
1993 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1994 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1995 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
1997 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
1999 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2009 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2010 // Returns 'true' if the flow graph is modified.
2011 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2013 bool modified = false;
2015 // Is the top of the current loop not in any nested loop?
2016 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2018 if (optCanonicalizeLoop(loopInd))
2024 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2025 child = optLoopTable[child].lpSibling)
2027 if (optCanonicalizeLoopNest(child))
2036 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2038 // Is the top uniquely part of the current loop?
2039 BasicBlock* t = optLoopTable[loopInd].lpTop;
2041 if (t->bbNatLoopNum == loopInd)
2046 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2048 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2050 // Otherwise, the top of this loop is also part of a nested loop.
2052 // Insert a new unique top for this loop. We must be careful to put this new
2053 // block in the correct EH region. Note that f->bbPrev might be in a different
2054 // EH region. For example:
2062 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2063 // On the other hand, the first block of multiple loops might be the first
2064 // block of a 'try' region that is completely contained in the multiple loops.
2069 // BB10 BBJ_ALWAYS => BB08
2071 // BB12 BBJ_ALWAYS => BB08
2073 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2074 // is a single-block "try" region. Neither loop "bottom" block is in the same
2075 // "try" region as BB08. This is legal because you can jump to the first block
2076 // of a try region. With EH normalization, no two "try" regions will share
2077 // this block. In this case, we need to insert a new block for the outer loop
2078 // in the same EH region as the branch from the "bottom":
2083 // BB10 BBJ_ALWAYS => BB08
2085 // BB12 BBJ_ALWAYS => BB30
2087 // Another possibility is that the "first" block of the loop nest can be the first block
2088 // of a "try" region that also has other predecessors than those in the loop, or even in
2089 // the "try" region (since blocks can target the first block of a "try" region). For example:
2093 // BB10 BBJ_ALWAYS => BB08
2095 // BB12 BBJ_ALWAYS => BB08
2098 // BB20 BBJ_ALWAYS => BB08
2100 // BB25 BBJ_ALWAYS => BB08
2102 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2103 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2104 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2105 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2106 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2107 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2108 // situation of branching to a non-first block of a 'try' region.
2110 // We can also have a loop nest where the "first" block is outside of a "try" region
2111 // and the back edges are inside a "try" region, for example:
2115 // BB09 try { BBJ_COND => BB02
2117 // BB15 BBJ_COND => BB02
2119 // BB21 } // end of "try"
2121 // In this case, both loop back edges were formed by "leave" instructions that were
2122 // imported into branches that were later made conditional. In this case, we don't
2123 // want to copy the EH region of the back edge, since that would create a block
2124 // outside of and disjoint with the "try" region of the back edge. However, to
2125 // simplify things, we disqualify this type of loop, so we should never see this here.
2127 BasicBlock* h = optLoopTable[loopInd].lpHead;
2128 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2129 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2131 // The loop must be entirely contained within a single handler region.
2132 assert(BasicBlock::sameHndRegion(f, b));
2134 // If the bottom block is in the same "try" region, then we extend the EH
2135 // region. Otherwise, we add the new block outside the "try" region.
2136 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2137 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2140 // We need to set the EH region manually. Set it to be the same
2141 // as the bottom block.
2142 newT->copyEHRegion(b);
2145 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2147 // Redirect the "bottom" of the current loop to "newT".
2148 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2149 blockMap->Set(t, newT);
2150 optRedirectBlock(b, blockMap);
2152 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2153 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2154 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2155 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2158 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2159 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2160 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2161 // edge of the blockMap, so nothing will happen.
2162 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2164 BasicBlock* topPredBlock = topPred->flBlock;
2166 // Skip if topPredBlock is in the loop.
2167 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2168 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2169 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2170 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2172 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2173 "redirecting its bottom edge\n",
2174 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2178 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2180 optRedirectBlock(topPredBlock, blockMap);
2183 assert(newT->bbNext == f);
2186 newT->bbJumpKind = BBJ_ALWAYS;
2187 newT->bbJumpDest = t;
2188 newT->bbTreeList = nullptr;
2189 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2192 // If it had been a do-while loop (top == entry), update entry, as well.
2193 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2194 if (optLoopTable[loopInd].lpTop == origE)
2196 optLoopTable[loopInd].lpEntry = newT;
2198 optLoopTable[loopInd].lpTop = newT;
2199 optLoopTable[loopInd].lpFirst = newT;
2201 newT->bbNatLoopNum = loopInd;
2203 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2204 dspPtr(newT), loopInd);
2206 // Make sure the head block still goes to the entry...
2207 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2209 h->bbJumpKind = BBJ_ALWAYS;
2210 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2212 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2214 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2215 optLoopTable[loopInd].lpHead = h2;
2216 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2217 h2->bbTreeList = nullptr;
2218 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2221 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2222 // it must be the case that they were do-while's (since "h" fell through to the entry).
2223 // The new node "newT" becomes the head of such loops.
2224 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2225 childLoop = optLoopTable[childLoop].lpSibling)
2227 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2228 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2230 optUpdateLoopHead(childLoop, h, newT);
2236 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2238 assert(l1 != BasicBlock::NOT_IN_LOOP);
2243 else if (l2 == BasicBlock::NOT_IN_LOOP)
2249 return optLoopContains(l1, optLoopTable[l2].lpParent);
2253 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2255 assert(optLoopTable[loopInd].lpHead == from);
2256 optLoopTable[loopInd].lpHead = to;
2257 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2258 childLoop = optLoopTable[childLoop].lpSibling)
2260 if (optLoopTable[childLoop].lpHead == from)
2262 optUpdateLoopHead(childLoop, from, to);
2267 /*****************************************************************************
2268 * If the : i += const" will cause an overflow exception for the small types.
2271 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2278 type_MAX = SCHAR_MAX;
2281 type_MAX = UCHAR_MAX;
2284 type_MAX = SHRT_MAX;
2287 type_MAX = USHRT_MAX;
2290 case TYP_UINT: // Detected by checking for 32bit ....
2292 return false; // ... overflow same as done for TYP_INT
2298 if (iterAtExit > type_MAX)
2308 /*****************************************************************************
2309 * If the "i -= const" will cause an underflow exception for the small types
2312 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2319 type_MIN = SCHAR_MIN;
2322 type_MIN = SHRT_MIN;
2331 case TYP_UINT: // Detected by checking for 32bit ....
2333 return false; // ... underflow same as done for TYP_INT
2339 if (iterAtExit < type_MIN)
2349 /*****************************************************************************
2351 * Helper for unroll loops - Computes the number of repetitions
2352 * in a constant loop. If it cannot prove the number is constant returns false
2355 bool Compiler::optComputeLoopRep(int constInit,
2358 genTreeOps iterOper,
2359 var_types iterOperType,
2360 genTreeOps testOper,
2363 unsigned* iterCount)
2365 noway_assert(genActualType(iterOperType) == TYP_INT);
2368 __int64 constLimitX;
2373 // Using this, we can just do a signed comparison with other 32 bit values.
2376 constLimitX = (unsigned int)constLimit;
2380 constLimitX = (signed int)constLimit;
2383 switch (iterOperType)
2385 // For small types, the iteration operator will narrow these values if big
2387 #define INIT_ITER_BY_TYPE(type) \
2388 constInitX = (type)constInit; \
2389 iterInc = (type)iterInc;
2392 INIT_ITER_BY_TYPE(signed char);
2395 INIT_ITER_BY_TYPE(unsigned char);
2398 INIT_ITER_BY_TYPE(signed short);
2401 INIT_ITER_BY_TYPE(unsigned short);
2404 // For the big types, 32 bit arithmetic is performed
2410 constInitX = (unsigned int)constInit;
2414 constInitX = (signed int)constInit;
2419 noway_assert(!"Bad type");
2423 /* If iterInc is zero we have an infinite loop */
2429 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2430 iterSign = (iterInc > 0) ? +1 : -1;
2432 /* Initialize loopCount to zero */
2435 // If dupCond is true then the loop head contains a test which skips
2436 // this loop, if the constInit does not pass the loop test
2437 // Such a loop can execute zero times.
2438 // If dupCond is false then we have a true do-while loop which we
2439 // always execute the loop once before performing the loop test
2443 constInitX += iterInc;
2446 // bail if count is based on wrap-around math
2449 if (constLimitX < constInitX)
2454 else if (constLimitX > constInitX)
2459 /* Compute the number of repetitions */
2463 __int64 iterAtExitX;
2466 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2470 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2471 * have a constant number of iterations or loop forever -
2472 * we have to compute (lim-init) mod iterInc to see if it is zero.
2473 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2474 * which is probably not what the end user wanted, but it is legal.
2479 /* Stepping by one, i.e. Mod with 1 is always zero */
2482 if (((constLimitX - constInitX) % iterInc) != 0)
2490 noway_assert(iterInc < 0);
2491 /* Stepping by -1, i.e. Mod with 1 is always zero */
2494 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2510 if (constInitX != constLimitX)
2512 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2515 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2519 iterAtExitX = (unsigned)iterAtExitX;
2522 // Check if iteration incr will cause overflow for small types
2523 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2528 // iterator with 32bit overflow. Bad for TYP_(U)INT
2529 if (iterAtExitX < constLimitX)
2534 *iterCount = loopCount;
2550 noway_assert(!"Unknown operator for loop iterator");
2564 if (constInitX < constLimitX)
2566 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2569 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2573 iterAtExitX = (unsigned)iterAtExitX;
2576 // Check if iteration incr will cause overflow for small types
2577 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2582 // iterator with 32bit overflow. Bad for TYP_(U)INT
2583 if (iterAtExitX < constLimitX)
2588 *iterCount = loopCount;
2604 noway_assert(!"Unknown operator for loop iterator");
2618 if (constInitX <= constLimitX)
2620 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2623 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2627 iterAtExitX = (unsigned)iterAtExitX;
2630 // Check if iteration incr will cause overflow for small types
2631 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2636 // iterator with 32bit overflow. Bad for TYP_(U)INT
2637 if (iterAtExitX <= constLimitX)
2642 *iterCount = loopCount;
2658 noway_assert(!"Unknown operator for loop iterator");
2672 if (constInitX > constLimitX)
2674 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2677 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2681 iterAtExitX = (unsigned)iterAtExitX;
2684 // Check if small types will underflow
2685 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2690 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2691 if (iterAtExitX > constLimitX)
2696 *iterCount = loopCount;
2712 noway_assert(!"Unknown operator for loop iterator");
2726 if (constInitX >= constLimitX)
2728 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2731 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2735 iterAtExitX = (unsigned)iterAtExitX;
2738 // Check if small types will underflow
2739 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2744 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2745 if (iterAtExitX >= constLimitX)
2750 *iterCount = loopCount;
2766 noway_assert(!"Unknown operator for loop iterator");
2771 noway_assert(!"Unknown operator for loop condition");
2777 /*****************************************************************************
2779 * Look for loop unrolling candidates and unroll them
2783 #pragma warning(push)
2784 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2786 void Compiler::optUnrollLoops()
2788 if (compCodeOpt() == SMALL_CODE)
2793 if (optLoopCount == 0)
2799 if (JitConfig.JitNoUnroll())
2805 if (optCanCloneLoops())
2813 printf("*************** In optUnrollLoops()\n");
2816 /* Look for loop unrolling candidates */
2818 /* Double loop so that after unrolling an inner loop we set change to true
2819 * and we then go back over all of the loop candidates and try to unroll
2820 * the next outer loop, until we don't unroll any loops,
2821 * then change will be false and we are done.
2825 bool change = false;
2827 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2841 int lbeg; // initial value for iterator
2842 int llim; // limit value for iterator
2843 unsigned lvar; // iterator lclVar #
2844 int iterInc; // value to increment the iterator
2845 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2846 var_types iterOperType; // type result of the oper (for overflow instrs)
2847 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2848 bool unsTest; // Is the comparison u/int
2850 unsigned totalIter; // total number of iterations in the constant loop
2851 unsigned loopCostSz; // Cost is size of one iteration
2852 unsigned loopFlags; // actual lpFlags
2853 unsigned requiredFlags; // required lpFlags
2855 GenTree* loopList; // new stmt list of the unrolled loop
2858 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2865 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2866 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2868 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2871 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2877 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2884 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2885 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2887 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2890 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2892 unrollLimitSz *= 10;
2896 loopFlags = optLoopTable[lnum].lpFlags;
2897 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2899 /* Ignore the loop if we don't have a do-while with a single exit
2900 that has a constant number of iterations */
2902 if ((loopFlags & requiredFlags) != requiredFlags)
2907 /* ignore if removed or marked as not unrollable */
2909 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2914 head = optLoopTable[lnum].lpHead;
2916 bottom = optLoopTable[lnum].lpBottom;
2917 noway_assert(bottom);
2919 /* The single exit must be at the bottom of the loop */
2920 noway_assert(optLoopTable[lnum].lpExit);
2921 if (optLoopTable[lnum].lpExit != bottom)
2926 /* Unrolling loops with jumps in them is not worth the headache
2927 * Later we might consider unrolling loops after un-switching */
2932 block = block->bbNext;
2933 noway_assert(block);
2935 if (block->bbJumpKind != BBJ_NONE)
2937 if (block != bottom)
2942 } while (block != bottom);
2944 /* Get the loop data:
2948 - iterator increment
2949 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2950 - loop test type (i.e. GT_GE, GT_LT, etc...)
2953 lbeg = optLoopTable[lnum].lpConstInit;
2954 llim = optLoopTable[lnum].lpConstLimit();
2955 testOper = optLoopTable[lnum].lpTestOper();
2957 lvar = optLoopTable[lnum].lpIterVar();
2958 iterInc = optLoopTable[lnum].lpIterConst();
2959 iterOper = optLoopTable[lnum].lpIterOper();
2961 iterOperType = optLoopTable[lnum].lpIterOperType();
2962 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2964 if (lvaTable[lvar].lvAddrExposed)
2965 { // If the loop iteration variable is address-exposed then bail
2968 if (lvaTable[lvar].lvIsStructField)
2969 { // If the loop iteration variable is a promoted field from a struct then
2974 /* Locate the pre-header and initialization and increment/test statements */
2976 phdr = head->bbTreeList;
2978 loop = bottom->bbTreeList;
2981 init = head->lastStmt();
2982 noway_assert(init && (init->gtNext == nullptr));
2983 test = bottom->lastStmt();
2984 noway_assert(test && (test->gtNext == nullptr));
2985 incr = test->gtPrev;
2988 if (init->gtFlags & GTF_STMT_CMPADD)
2990 /* Must be a duplicated loop condition */
2991 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2994 init = init->gtPrev;
3002 /* Find the number of iterations - the function returns false if not a constant number */
3004 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3009 /* Forget it if there are too many repetitions or not a constant loop */
3011 if (totalIter > iterLimit)
3016 noway_assert(init->gtOper == GT_STMT);
3017 init = init->gtStmt.gtStmtExpr;
3018 noway_assert(test->gtOper == GT_STMT);
3019 test = test->gtStmt.gtStmtExpr;
3020 noway_assert(incr->gtOper == GT_STMT);
3021 incr = incr->gtStmt.gtStmtExpr;
3023 // Don't unroll loops we don't understand.
3024 if (incr->gtOper == GT_ASG)
3029 /* Make sure everything looks ok */
3030 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3031 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3032 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3034 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
3035 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
3036 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3038 (test->gtOper != GT_JTRUE))
3040 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3044 /* heuristic - Estimated cost in code size of the unrolled loop */
3052 block = block->bbNext;
3054 /* Visit all the statements in the block */
3056 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3058 /* Get the expression and stop if end reached */
3060 GenTreePtr expr = stmt->gtStmtExpr;
3066 /* Calculate gtCostSz */
3067 gtSetStmtInfo(stmt);
3069 /* Update loopCostSz */
3070 loopCostSz += stmt->gtCostSz;
3072 } while (block != bottom);
3074 /* Compute the estimated increase in code size for the unrolled loop */
3076 unsigned int fixedLoopCostSz;
3077 fixedLoopCostSz = 8;
3080 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3082 /* Don't unroll if too much code duplication would result. */
3084 if (unrollCostSz > unrollLimitSz)
3086 /* prevent this loop from being revisited */
3087 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3091 /* Looks like a good idea to unroll this loop, let's do it! */
3092 CLANG_FORMAT_COMMENT_ANCHOR;
3097 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3098 if (head->bbNext->bbNum != bottom->bbNum)
3100 printf("..BB%02u", bottom->bbNum);
3102 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3103 printf(" unrollCostSz = %d\n", unrollCostSz);
3108 /* Create the unrolled loop statement list */
3110 loopList = loopLast = nullptr;
3112 for (lval = lbeg; totalIter; totalIter--)
3121 block = block->bbNext;
3122 noway_assert(block);
3124 /* Visit all the statements in the block */
3126 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3128 /* Stop if we've reached the end of the loop */
3130 if (stmt->gtStmtExpr == incr)
3135 /* Clone/substitute the expression */
3137 expr = gtCloneExpr(stmt, 0, lvar, lval);
3139 // cloneExpr doesn't handle everything
3143 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3147 /* Append the expression to our list */
3151 loopLast->gtNext = expr;
3158 expr->gtPrev = loopLast;
3161 } while (block != bottom);
3163 /* update the new value for the unrolled iterator */
3177 noway_assert(!"Unrolling not implemented for this loop iterator");
3181 noway_assert(!"Unknown operator for constant loop iterator");
3186 /* Finish the linked list */
3190 loopList->gtPrev = loopLast;
3191 loopLast->gtNext = nullptr;
3194 /* Replace the body with the unrolled one */
3200 block = block->bbNext;
3201 noway_assert(block);
3202 block->bbTreeList = nullptr;
3203 block->bbJumpKind = BBJ_NONE;
3204 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3205 } while (block != bottom);
3207 bottom->bbJumpKind = BBJ_NONE;
3208 bottom->bbTreeList = loopList;
3209 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3210 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3214 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3216 /* Update bbRefs and bbPreds */
3217 /* Here head->bbNext is bottom !!! - Replace it */
3219 fgRemoveRefPred(head->bbNext, bottom);
3221 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3222 * (the last value of the iterator in the loop)
3223 * and drop the jump condition since the unrolled loop will always execute */
3225 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3227 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3229 if (head->bbJumpKind == BBJ_COND)
3231 phdr = head->bbTreeList;
3233 test = phdr->gtPrev;
3235 noway_assert(test && (test->gtNext == nullptr));
3236 noway_assert(test->gtOper == GT_STMT);
3237 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3239 init = test->gtPrev;
3240 noway_assert(init && (init->gtNext == test));
3241 noway_assert(init->gtOper == GT_STMT);
3243 init->gtNext = nullptr;
3244 phdr->gtPrev = init;
3245 head->bbJumpKind = BBJ_NONE;
3246 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3248 /* Update bbRefs and bbPreds */
3250 fgRemoveRefPred(head->bbJumpDest, head);
3254 /* the loop must execute */
3255 noway_assert(head->bbJumpKind == BBJ_NONE);
3261 printf("Whole unrolled loop:\n");
3263 GenTreePtr s = loopList;
3267 noway_assert(s->gtOper == GT_STMT);
3278 /* Remember that something has changed */
3282 /* Make sure to update loop table */
3284 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3285 * (also make head and bottom NULL - to hit an assert or GPF) */
3287 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3288 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3300 fgDebugCheckBBlist();
3304 #pragma warning(pop)
3307 /*****************************************************************************
3309 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3310 * not execute a method call.
3313 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3315 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3316 // as some helper calls are neither interruptible nor hijackable.
3317 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3318 // those helpers too.
3320 noway_assert(topBB->bbNum <= botBB->bbNum);
3322 // We can always check topBB and botBB for any gc safe points and early out
3324 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3329 // Otherwise we will need to rely upon the dominator sets
3331 if (!fgDomsComputed)
3333 // return a conservative answer of true when we don't have the dominator sets
3337 BasicBlock* curBB = topBB;
3340 noway_assert(curBB);
3342 // If we added a loop pre-header block then we will
3343 // have a bbNum greater than fgLastBB, and we won't have
3344 // any dominator information about this block, so skip it.
3346 if (curBB->bbNum <= fgLastBB->bbNum)
3348 noway_assert(curBB->bbNum <= botBB->bbNum);
3350 // Does this block contain a gc safe point?
3352 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3354 // Will this block always execute on the way to botBB ?
3356 // Since we are checking every block in [topBB .. botBB] and we are using
3357 // a lexical definition of a loop.
3358 // (all that we know is that is that botBB is a back-edge to topBB)
3359 // Thus while walking blocks in this range we may encounter some blocks
3360 // that are not really part of the loop, and so we need to perform
3361 // some additional checks:
3363 // We will check that the current 'curBB' is reachable from 'topBB'
3364 // and that it dominates the block containing the back-edge 'botBB'
3365 // When both of these are true then we know that the gcsafe point in 'curBB'
3366 // will be encountered in the loop and we can return false
3368 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3375 // If we've reached the destination block, then we're done
3384 curBB = curBB->bbNext;
3387 // If we didn't find any blocks that contained a gc safe point and
3388 // also met the fgDominate and fgReachable criteria then we must return true
3393 /*****************************************************************************
3395 * Find the loop termination test at the bottom of the loop
3398 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3400 GenTreePtr testt = bottom->bbTreeList;
3402 assert(testt && testt->gtOper == GT_STMT);
3404 GenTreePtr result = testt->gtPrev;
3407 while (testt->gtNext)
3409 testt = testt->gtNext;
3412 assert(testt == result);
3418 /*****************************************************************************
3419 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3422 void Compiler::fgOptWhileLoop(BasicBlock* block)
3424 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3425 noway_assert(compCodeOpt() != SMALL_CODE);
3428 Optimize while loops into do { } while loop
3429 Our loop hoisting logic requires do { } while loops.
3430 Specifically, we're looking for the following case:
3441 If we find this, and the condition is simple enough, we change
3442 the loop to the following:
3447 // else fall-through
3458 /* Does the BB end with an unconditional jump? */
3460 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3461 { // It can't be one of the ones we use for our exception magic
3465 // It has to be a forward jump
3466 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3468 if (fgIsForwardBranch(block) == false)
3473 // Get hold of the jump target
3474 BasicBlock* bTest = block->bbJumpDest;
3476 // Does the block consist of 'jtrue(cond) block' ?
3477 if (bTest->bbJumpKind != BBJ_COND)
3482 // bTest must be a backwards jump to block->bbNext
3483 if (bTest->bbJumpDest != block->bbNext)
3488 // Since test is a BBJ_COND it will have a bbNext
3489 noway_assert(bTest->bbNext);
3491 // 'block' must be in the same try region as the condition, since we're going to insert
3492 // a duplicated condition in 'block', and the condition might include exception throwing code.
3493 if (!BasicBlock::sameTryRegion(block, bTest))
3498 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3499 // same try region (or no try region) to avoid generating illegal flow.
3500 BasicBlock* bTestNext = bTest->bbNext;
3501 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3506 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3508 // bTest must only contain only a jtrue with no other stmts, we will only clone
3509 // the conditional, so any other statements will not get cloned
3510 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3512 if (bTest->bbTreeList != condStmt)
3517 /* Get to the condition node from the statement tree */
3519 noway_assert(condStmt->gtOper == GT_STMT);
3521 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3522 noway_assert(condTree->gtOper == GT_JTRUE);
3524 condTree = condTree->gtOp.gtOp1;
3526 // The condTree has to be a RelOp comparison
3527 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3529 if (condTree->OperIsCompare() == false)
3534 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3536 gtPrepareCost(condTree);
3537 unsigned estDupCostSz = condTree->gtCostSz;
3539 double loopIterations = (double)BB_LOOP_WEIGHT;
3541 bool allProfileWeightsAreValid = false;
3542 BasicBlock::weight_t weightBlock = block->bbWeight;
3543 BasicBlock::weight_t weightTest = bTest->bbWeight;
3544 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3546 // If we have profile data then we calculate the number of time
3547 // the loop will iterate into loopIterations
3548 if (fgIsUsingProfileWeights())
3550 // Only rely upon the profile weight when all three of these blocks
3551 // have good profile weights
3552 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3553 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3555 allProfileWeightsAreValid = true;
3557 // If this while loop never iterates then don't bother transforming
3558 if (weightNext == 0)
3563 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3564 // if the profile weights are all valid.
3566 // weightNext is the number of time this loop iterates
3567 // weightBlock is the number of times that we enter the while loop
3568 // loopIterations is the average number of times that this loop iterates
3570 if (weightTest >= weightBlock)
3572 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3577 unsigned maxDupCostSz = 32;
3579 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3580 // set loop weights yet
3581 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3586 // If this loop iterates a lot then raise the maxDupCost
3587 if (loopIterations >= 12.0)
3591 if (loopIterations >= 96.0)
3596 // If the loop condition has a shared static helper, we really want this loop converted
3597 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3598 // be executed on every loop iteration.
3599 int countOfHelpers = 0;
3600 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3602 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3604 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3607 // If the compare has too high cost then we don't want to dup
3609 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3614 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3615 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3616 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3617 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3618 allProfileWeightsAreValid ? "true" : "false");
3627 /* Looks good - duplicate the condition test */
3629 condTree->gtFlags |= GTF_RELOP_ZTT;
3631 condTree = gtCloneExpr(condTree);
3632 gtReverseCond(condTree);
3634 // Make sure clone expr copied the flag
3635 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3637 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3639 /* Create a statement entry out of the condition and
3640 append the condition test at the end of 'block' */
3642 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3644 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3646 if (opts.compDbgInfo)
3648 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3651 // Flag the block that received the copy as potentially having an array/vtable
3652 // reference if the block copied from did; this is a conservative guess.
3653 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3655 block->bbFlags |= copyFlags;
3658 // If we have profile data for all blocks and we know that we are cloning the
3659 // bTest block into block and thus changing the control flow from block so
3660 // that it no longer goes directly to bTest anymore, we have to adjust the
3661 // weight of bTest by subtracting out the weight of block.
3663 if (allProfileWeightsAreValid)
3666 // Some additional sanity checks before adjusting the weight of bTest
3668 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3670 // Get the two edge that flow out of bTest
3671 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3672 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3674 // Calculate the new weight for block bTest
3676 BasicBlock::weight_t newWeightTest =
3677 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3678 bTest->bbWeight = newWeightTest;
3680 if (newWeightTest == BB_ZERO_WEIGHT)
3682 bTest->bbFlags |= BBF_RUN_RARELY;
3683 // All out edge weights are set to zero
3684 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3685 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3686 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3687 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3691 // Update the our edge weights
3692 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3693 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3694 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3695 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3700 /* Change the block to end with a conditional jump */
3702 block->bbJumpKind = BBJ_COND;
3703 block->bbJumpDest = bTest->bbNext;
3705 /* Mark the jump dest block as being a jump target */
3706 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3708 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3710 fgAddRefPred(block->bbNext, block);
3712 fgRemoveRefPred(bTest, block);
3713 fgAddRefPred(bTest->bbNext, block);
3718 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3720 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3722 gtDispTree(copyOfCondStmt);
3728 /*****************************************************************************
3730 * Optimize the BasicBlock layout of the method
3733 void Compiler::optOptimizeLayout()
3735 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3740 printf("*************** In optOptimizeLayout()\n");
3744 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3745 fgDebugCheckBBlist();
3748 noway_assert(fgModified == false);
3750 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3752 /* Make sure the appropriate fields are initialized */
3754 if (block->bbWeight == BB_ZERO_WEIGHT)
3756 /* Zero weighted block can't have a LOOP_HEAD flag */
3757 noway_assert(block->isLoopHead() == false);
3761 assert(block->bbLoopNum == 0);
3763 if (compCodeOpt() != SMALL_CODE)
3765 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3767 fgOptWhileLoop(block);
3773 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3774 fgComputeEdgeWeights();
3777 fgUpdateFlowGraph(true);
3779 fgUpdateFlowGraph();
3782 /*****************************************************************************
3784 * Perform loop inversion, find and classify natural loops
3787 void Compiler::optOptimizeLoops()
3789 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3794 printf("*************** In optOptimizeLoops()\n");
3798 optSetBlockWeights();
3800 /* Were there any loops in the flow graph? */
3804 /* now that we have dominator information we can find loops */
3806 optFindNaturalLoops();
3808 unsigned loopNum = 0;
3810 /* Iterate over the flow graph, marking all loops */
3812 /* We will use the following terminology:
3813 * top - the first basic block in the loop (i.e. the head of the backward edge)
3814 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3815 * lastBottom - used when we have multiple back-edges to the same top
3822 for (top = fgFirstBB; top; top = top->bbNext)
3824 BasicBlock* foundBottom = nullptr;
3826 for (pred = top->bbPreds; pred; pred = pred->flNext)
3828 /* Is this a loop candidate? - We look for "back edges" */
3830 BasicBlock* bottom = pred->flBlock;
3832 /* is this a backward edge? (from BOTTOM to TOP) */
3834 if (top->bbNum > bottom->bbNum)
3839 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3841 if (top->isLoopHead() == false)
3846 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3848 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3853 /* the top block must be able to reach the bottom block */
3854 if (!fgReachable(top, bottom))
3859 /* Found a new loop, record the longest backedge in foundBottom */
3861 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3863 foundBottom = bottom;
3871 /* Mark the loop header as such */
3872 assert(FitsIn<unsigned char>(loopNum));
3873 top->bbLoopNum = (unsigned char)loopNum;
3876 /* Mark all blocks between 'top' and 'bottom' */
3878 optMarkLoopBlocks(top, foundBottom, false);
3881 // We track at most 255 loops
3885 totalUnnatLoopOverflows++;
3892 totalUnnatLoopCount += loopNum;
3900 printf("\nFound a total of %d loops.", loopNum);
3901 printf("\nAfter loop weight marking:\n");
3902 fgDispBasicBlocks();
3907 optLoopsMarked = true;
3911 //------------------------------------------------------------------------
3912 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3915 // loopNum - the current loop index for which conditions are derived.
3916 // context - data structure where all loop cloning info is kept.
3919 // "false" if conditions cannot be obtained. "true" otherwise.
3920 // The cloning conditions are updated in the "conditions"[loopNum] field
3921 // of the "context" parameter.
3924 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3925 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3926 // condition is "less than". If the initializer is "var" init then adds condition
3927 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3928 // are added to "context". These conditions are checked in the pre-header block
3929 // and the cloning choice is made.
3932 // Callers should assume AND operation is used i.e., if all conditions are
3933 // true, then take the fast path.
3935 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3937 JITDUMP("------------------------------------------------------------\n");
3938 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3940 LoopDsc* loop = &optLoopTable[loopNum];
3941 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3943 if (loop->lpTestOper() == GT_LT)
3945 // Stride conditions
3946 if (loop->lpIterConst() <= 0)
3948 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3953 if (loop->lpFlags & LPFLG_CONST_INIT)
3955 // Only allowing const init at this time.
3956 if (loop->lpConstInit < 0)
3958 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3962 else if (loop->lpFlags & LPFLG_VAR_INIT)
3965 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3966 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3967 context->EnsureConditions(loopNum)->Push(geZero);
3971 JITDUMP("> Not variable init\n");
3977 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3979 int limit = loop->lpConstLimit();
3982 JITDUMP("> limit %d is invalid\n", limit);
3985 ident = LC_Ident(limit, LC_Ident::Const);
3987 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3989 unsigned limitLcl = loop->lpVarLimit();
3990 ident = LC_Ident(limitLcl, LC_Ident::Var);
3992 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
3994 context->EnsureConditions(loopNum)->Push(geZero);
3996 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
3998 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
3999 if (!loop->lpArrLenLimit(this, index))
4001 JITDUMP("> ArrLen not matching");
4004 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4006 // Ensure that this array must be dereference-able, before executing the actual condition.
4007 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4008 context->EnsureDerefs(loopNum)->Push(array);
4012 JITDUMP("> Undetected limit\n");
4016 for (unsigned i = 0; i < optInfos->Size(); ++i)
4018 LcOptInfo* optInfo = optInfos->GetRef(i);
4019 switch (optInfo->GetOptType())
4021 case LcOptInfo::LcJaggedArray:
4024 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4025 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4026 LC_Ident arrLenIdent = LC_Ident(arrLen);
4028 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4029 context->EnsureConditions(loopNum)->Push(cond);
4031 // Ensure that this array must be dereference-able, before executing the actual condition.
4032 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4033 context->EnsureDerefs(loopNum)->Push(array);
4036 case LcOptInfo::LcMdArray:
4038 // limit <= mdArrLen
4039 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4040 LC_Condition cond(GT_LE, LC_Expr(ident),
4041 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4042 mdArrInfo->GetArrIndexForDim(getAllocator()),
4043 mdArrInfo->dim, LC_Array::None))));
4044 context->EnsureConditions(loopNum)->Push(cond);
4049 JITDUMP("Unknown opt\n");
4053 JITDUMP("Conditions: (");
4054 DBEXEC(verbose, context->PrintConditions(loopNum));
4061 //------------------------------------------------------------------------------------
4062 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4065 // loopNum - the current loop index for which conditions are derived.
4066 // context - data structure where all loop cloning info is kept.
4069 // "false" if conditions cannot be obtained. "true" otherwise.
4070 // The deref conditions are updated in the "derefConditions"[loopNum] field
4071 // of the "context" parameter.
4073 // Definition of Deref Conditions:
4074 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4075 // we should first be able to dereference "a". i.e., "a" is non-null.
4081 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4082 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4085 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4086 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4087 // be true to do the deref.
4089 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4091 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4092 // blocks each of which will branch to slow path if the condition evaluates to false.
4094 // Now, imagine a situation where we have
4095 // a[x][y][k] = 20 and a[i][j][k] = 0
4096 // also in the inner most loop where x, y are parameters, then our conditions will have
4100 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4102 // But these conditions can be checked together with conditions
4103 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4106 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4107 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4108 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4109 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4111 // This naturally yields a tree style pattern, where the nodes of the tree are
4112 // the array and indices respectively.
4128 // Notice that the variables in the same levels can have their conditions combined in the
4129 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4130 // combined with a short-circuiting AND (i.e., different basic blocks).
4133 // Construct a tree of array indices and the array which will generate the optimal
4134 // conditions for loop cloning.
4136 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4151 // In this method, we will construct such a tree by descending depth first into the array
4152 // index operation and forming a tree structure as we encounter the array or the index variables.
4154 // This tree structure will then be used to generate conditions like below:
4155 // (a != null) & (b != null) && // from the first level of the tree.
4157 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4158 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4160 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4161 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4166 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4168 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4171 // Get the dereference-able arrays.
4172 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4174 // For each array in the dereference list, construct a tree,
4175 // where the nodes are array and index variables and an edge 'u-v'
4176 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4177 // 'u-v-w' transitively if u[v][w] occurs.
4178 for (unsigned i = 0; i < deref->Size(); ++i)
4180 LC_Array& array = (*deref)[i];
4182 // First populate the array base variable.
4183 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4184 if (node == nullptr)
4186 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4190 // For each dimension (level) for the array, populate the tree with the variable
4191 // from that dimension.
4192 unsigned rank = (unsigned)array.GetDimRank();
4193 for (unsigned i = 0; i < rank; ++i)
4195 node->EnsureChildren(getAllocator());
4196 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4199 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4200 node->children->Push(tmp);
4203 // Descend one level down.
4207 // Keep the maxRank of all array dereferences.
4208 maxRank = max((int)rank, maxRank);
4214 for (unsigned i = 0; i < nodes.Size(); ++i)
4231 // First level will always yield the null-check, since it is made of the array base variables.
4232 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4233 // So add 1 after rank * 2.
4234 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4236 // Heuristic to not create too many blocks;
4242 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4243 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4244 for (unsigned i = 0; i < nodes.Size(); ++i)
4246 nodes[i]->DeriveLevelConditions(levelCond);
4249 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4254 //----------------------------------------------------------------------------
4255 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4258 // block - the block in which the helper call needs to be inserted.
4259 // insertBefore - the tree before which the helper call will be inserted.
4261 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4263 if (JitConfig.JitDebugLogLoopCloning() == 0)
4267 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4268 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4269 fgInsertStmtBefore(block, insertBefore, stmt);
4270 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4274 //------------------------------------------------------------------------
4275 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4276 // candidates gathered during the cloning phase.
4279 // loopNum - the current loop index for which the optimizations are performed.
4280 // context - data structure where all loop cloning info is kept.
4281 // dynamicPath - If true, the optimization is performed in the fast path among the
4282 // cloned loops. If false, it means this is the only path (i.e.,
4283 // there is no slow path.)
4286 // Perform the optimizations on the fast path i.e., the path in which the
4287 // optimization candidates were collected at the time of identifying them.
4288 // The candidates store all the information necessary (the tree/stmt/block
4289 // they are from) to perform the optimization.
4292 // The unoptimized path is either already cloned when this method is called or
4293 // there is no unoptimized path (got eliminated statically.) So this method
4294 // performs the optimizations assuming that the path in which the candidates
4295 // were collected is the fast path in which the optimizations will be performed.
4297 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4299 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4300 for (unsigned i = 0; i < optInfos->Size(); ++i)
4302 LcOptInfo* optInfo = optInfos->GetRef(i);
4303 switch (optInfo->GetOptType())
4305 case LcOptInfo::LcJaggedArray:
4307 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4308 compCurBB = arrIndexInfo->arrIndex.useBlock;
4309 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4311 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4314 case LcOptInfo::LcMdArray:
4315 // TODO-CQ: CLONE: Implement.
4323 //----------------------------------------------------------------------------
4324 // optCanCloneLoops: Use the environment flag to determine whether loop
4325 // cloning is allowed to be performed.
4328 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4329 // Disabled for retail for now.
4331 bool Compiler::optCanCloneLoops()
4333 // Enabled for retail builds now.
4334 unsigned cloneLoopsFlag = 1;
4336 cloneLoopsFlag = JitConfig.JitCloneLoops();
4338 return (cloneLoopsFlag != 0);
4341 //----------------------------------------------------------------------------
4342 // optIsLoopClonable: Determine whether this loop can be cloned.
4345 // loopInd loop index which needs to be checked if it can be cloned.
4348 // Returns true if the loop can be cloned. If it returns false
4349 // prints a message in debug as why the loop can't be cloned.
4351 bool Compiler::optIsLoopClonable(unsigned loopInd)
4353 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4354 // inserting new EH regions in the exception table yet.
4355 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4356 unsigned loopRetCount = 0;
4357 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4359 if (blk->bbJumpKind == BBJ_RETURN)
4363 if (bbIsTryBeg(blk))
4365 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4370 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4371 // into the middle of a handler (to go to the cloned copy.) Reject.
4372 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4374 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4378 // If the head and entry are in different EH regions, reject.
4379 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4381 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4385 // Is the first block after the last block of the loop a handler or filter start?
4386 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4387 // and go to where the original loop did. That raises problems when we don't actually go to
4388 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4389 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4390 // loop. This is just a corner to cut to get this working faster.
4391 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4392 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4394 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4398 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4399 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4400 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4401 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4402 if (fgReturnCount + loopRetCount > 4)
4404 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4405 "would exceed the limit of 4.\n",
4406 loopRetCount, fgReturnCount);
4410 // Otherwise, we're going to add those return blocks.
4411 fgReturnCount += loopRetCount;
4416 /*****************************************************************************
4418 * Identify loop cloning opportunities, derive loop cloning conditions,
4419 * perform loop cloning, use the derived conditions to choose which
4422 void Compiler::optCloneLoops()
4424 JITDUMP("\n*************** In optCloneLoops()\n");
4425 if (optLoopCount == 0 || !optCanCloneLoops())
4433 printf("Blocks/Trees at start of phase\n");
4434 fgDispBasicBlocks(true);
4438 LoopCloneContext context(optLoopCount, getAllocator());
4440 // Obtain array optimization candidates in the context.
4441 optObtainLoopCloningOpts(&context);
4443 // For each loop, derive cloning conditions for the optimization candidates.
4444 for (unsigned i = 0; i < optLoopCount; ++i)
4446 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4447 if (optInfos == nullptr)
4452 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4454 JITDUMP("> Conditions could not be obtained\n");
4455 context.CancelLoopOptInfo(i);
4459 bool allTrue = false;
4460 bool anyFalse = false;
4461 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4464 context.CancelLoopOptInfo(i);
4468 // Perform static optimizations on the fast path since we always
4469 // have to take the cloned path.
4470 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4472 // No need to clone.
4473 context.CancelLoopOptInfo(i);
4479 // The code in this #if has been useful in debugging loop cloning issues, by
4480 // enabling selective enablement of the loop cloning optimization according to
4483 unsigned methHash = info.compMethodHash();
4484 char* lostr = getenv("loopclonehashlo");
4485 unsigned methHashLo = 0;
4488 sscanf_s(lostr, "%x", &methHashLo);
4489 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4491 char* histr = getenv("loopclonehashhi");
4492 unsigned methHashHi = UINT32_MAX;
4495 sscanf_s(histr, "%x", &methHashHi);
4496 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4498 if (methHash < methHashLo || methHash > methHashHi)
4503 for (unsigned i = 0; i < optLoopCount; ++i)
4505 if (context.GetLoopOptInfo(i) != nullptr)
4508 context.OptimizeConditions(i DEBUGARG(verbose));
4509 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4510 optCloneLoop(i, &context);
4517 printf("\nAfter loop cloning:\n");
4518 fgDispBasicBlocks(/*dumpTrees*/ true);
4523 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4525 assert(loopInd < optLoopCount);
4527 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4528 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4529 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4531 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4532 unsigned depth = optLoopDepth(loopInd);
4533 unsigned ambientWeight = 1;
4534 for (unsigned j = 0; j < depth; j++)
4536 unsigned lastWeight = ambientWeight;
4537 ambientWeight *= BB_LOOP_WEIGHT;
4538 // If the multiplication overflowed, stick at max.
4539 // (Strictly speaking, a multiplication could overflow and still have a result
4540 // that is >= lastWeight...but if so, the original weight must be pretty large,
4541 // and it got bigger, so that's OK.)
4542 if (ambientWeight < lastWeight)
4544 ambientWeight = BB_MAX_WEIGHT;
4549 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4550 // Be safe by taking the max with the head block's weight.
4551 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4553 // This is the containing loop, if any -- to label any blocks we create that are outside
4554 // the loop being cloned.
4555 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4557 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4558 optEnsureUniqueHead(loopInd, ambientWeight);
4560 // We're going to make
4572 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4584 BasicBlock* h = optLoopTable[loopInd].lpHead;
4585 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4587 // Make a new block to be the unique entry to the loop.
4588 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4589 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4590 /*extendRegion*/ true);
4591 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4592 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4593 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4594 newH->bbNatLoopNum = ambientLoop;
4596 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4599 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4600 // "newPred" will be the predecessor of the blocks of the cloned loop.
4601 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4602 BasicBlock* newPred = b;
4603 if (b->bbJumpKind != BBJ_ALWAYS)
4605 BasicBlock* x = b->bbNext;
4608 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4609 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4611 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4612 x2->bbNatLoopNum = ambientLoop;
4615 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4620 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4621 // so that "h" already falls through to "e" (e == t == f).
4622 BasicBlock* h2 = nullptr;
4623 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4625 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4626 /*extendRegion*/ true);
4627 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4629 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4630 h2->bbNatLoopNum = ambientLoop;
4632 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4633 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4636 // Now we'll clone the blocks of the loop body.
4637 BasicBlock* newFirst = nullptr;
4638 BasicBlock* newBot = nullptr;
4640 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4641 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4644 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4645 /*extendRegion*/ true);
4647 BasicBlock::CloneBlockState(this, newBlk, blk);
4648 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4649 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4650 // loop, if one exists -- the parent of the loop we're cloning.
4651 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4653 if (newFirst == nullptr)
4657 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4659 blockMap->Set(blk, newBlk);
4662 // Perform the static optimizations on the fast path.
4663 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4665 // Now go through the new blocks, remapping their jump targets within the loop.
4666 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4670 BasicBlock* newblk = nullptr;
4671 bool b = blockMap->Lookup(blk, &newblk);
4672 assert(b && newblk != nullptr);
4674 assert(blk->bbJumpKind == newblk->bbJumpKind);
4676 // First copy the jump destination(s) from "blk".
4677 optCopyBlkDest(blk, newblk);
4679 // Now redirect the new block according to "blockMap".
4680 optRedirectBlock(newblk, blockMap);
4683 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4684 (h->bbJumpKind == BBJ_ALWAYS));
4686 // If all the conditions are true, go to E2.
4687 BasicBlock* e2 = nullptr;
4688 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4690 h->bbJumpKind = BBJ_COND;
4692 // We will create the following structure
4694 // cond0 (in h) -?> cond1
4695 // slow --> e2 (slow) always
4702 // We should always have block conditions, at the minimum, the array should be deref-able
4703 assert(context->HasBlockConditions(loopInd));
4705 // Create a unique header for the slow path.
4706 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4707 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4708 slowHead->bbNatLoopNum = ambientLoop;
4709 slowHead->bbJumpDest = e2;
4711 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4712 condLast->bbJumpDest = slowHead;
4714 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4717 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4719 assert(foundIt && e2 != nullptr);
4721 fgUpdateChangedFlowGraph();
4724 //--------------------------------------------------------------------------------------------------
4725 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4728 // context loop cloning context variable
4729 // loopNum the loop index
4730 // head loop head for "loopNum"
4731 // slowHead the slow path loop head
4737 // Create the following structure.
4739 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4740 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4742 // cond0 (in h) -?> cond1
4743 // slowHead --> e2 (slowHead) always
4744 // !cond1 -?> slowHead
4745 // !cond2 -?> slowHead
4747 // !condn -?> slowHead
4750 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4752 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4755 BasicBlock* slowHead)
4757 JITDUMP("Inserting loop cloning conditions\n");
4758 assert(context->HasBlockConditions(loopNum));
4760 BasicBlock* curCond = head;
4761 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4762 for (unsigned i = 0; i < levelCond->Size(); ++i)
4764 bool isHeaderBlock = (curCond == head);
4766 // Flip the condition if header block.
4767 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4769 // Create each condition block ensuring wiring between them.
4770 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4771 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4774 curCond->inheritWeight(head);
4775 curCond->bbNatLoopNum = head->bbNatLoopNum;
4776 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4779 // Finally insert cloning conditions after all deref conditions have been inserted.
4780 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4784 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4786 BasicBlock* h = optLoopTable[loopInd].lpHead;
4787 BasicBlock* t = optLoopTable[loopInd].lpTop;
4788 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4789 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4791 // If "h" dominates the entry block, then it is the unique header.
4792 if (fgDominate(h, e))
4797 // Otherwise, create a new empty header block, make it the pred of the entry block,
4798 // and redirect the preds of the entry block to go to this.
4800 BasicBlock* beforeTop = t->bbPrev;
4801 // Make sure that the new block is in the same region as the loop.
4802 // (We will only create loops that are entirely within a region.)
4803 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4804 // This is in the containing loop.
4805 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4806 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4808 // We don't care where it was put; splice it between beforeTop and top.
4809 if (beforeTop->bbNext != h2)
4811 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4812 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4816 if (h2->bbNext != e)
4818 h2->bbJumpKind = BBJ_ALWAYS;
4821 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4823 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4824 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4825 blockMap->Set(e, h2);
4827 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4829 BasicBlock* predBlock = predEntry->flBlock;
4831 // Skip if predBlock is in the loop.
4832 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4836 optRedirectBlock(predBlock, blockMap);
4839 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4842 /*****************************************************************************
4844 * Determine the kind of interference for the call.
4847 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4849 assert(call->gtOper == GT_CALL);
4851 // if not a helper, kills everything
4852 if (call->gtCall.gtCallType != CT_HELPER)
4857 // setfield and array address store kill all indirections
4858 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4860 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4861 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4862 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4863 case CORINFO_HELP_SETFIELDOBJ:
4864 case CORINFO_HELP_ARRADDR_ST:
4866 return CALLINT_REF_INDIRS;
4868 case CORINFO_HELP_SETFIELDFLOAT:
4869 case CORINFO_HELP_SETFIELDDOUBLE:
4870 case CORINFO_HELP_SETFIELD8:
4871 case CORINFO_HELP_SETFIELD16:
4872 case CORINFO_HELP_SETFIELD32:
4873 case CORINFO_HELP_SETFIELD64:
4875 return CALLINT_SCL_INDIRS;
4877 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4878 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4879 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4880 case CORINFO_HELP_SETFIELDSTRUCT:
4882 return CALLINT_ALL_INDIRS;
4888 // other helpers kill nothing
4889 return CALLINT_NONE;
4892 /*****************************************************************************
4894 * See if the given tree can be computed in the given precision (which must
4895 * be smaller than the type of the tree for this to make sense). If 'doit'
4896 * is false, we merely check to see whether narrowing is possible; if we
4897 * get called with 'doit' being true, we actually perform the narrowing.
4900 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4906 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4908 /* Assume we're only handling integer types */
4909 noway_assert(varTypeIsIntegral(srct));
4910 noway_assert(varTypeIsIntegral(dstt));
4912 unsigned srcSize = genTypeSize(srct);
4913 unsigned dstSize = genTypeSize(dstt);
4915 /* dstt must be smaller than srct to narrow */
4916 if (dstSize >= srcSize)
4921 /* Figure out what kind of a node we have */
4922 oper = tree->OperGet();
4923 kind = tree->OperKind();
4925 if (kind & GTK_ASGOP)
4927 noway_assert(doit == false);
4931 ValueNumPair NoVNPair = ValueNumPair();
4933 if (kind & GTK_LEAF)
4937 /* Constants can usually be narrowed by changing their value */
4938 CLANG_FORMAT_COMMENT_ANCHOR;
4940 #ifndef _TARGET_64BIT_
4945 lval = tree->gtIntConCommon.LngValue();
4974 if ((lval & lmask) != lval)
4979 tree->ChangeOperConst(GT_CNS_INT);
4980 tree->gtType = TYP_INT;
4981 tree->gtIntCon.gtIconVal = (int)lval;
4982 if (vnStore != nullptr)
4984 fgValueNumberTreeConst(tree);
4994 ival = tree->gtIntCon.gtIconVal;
5013 #ifdef _TARGET_64BIT_
5020 #endif // _TARGET_64BIT_
5025 if ((ival & imask) != ival)
5030 #ifdef _TARGET_64BIT_
5033 tree->gtType = TYP_INT;
5034 tree->gtIntCon.gtIconVal = (int)ival;
5035 if (vnStore != nullptr)
5037 fgValueNumberTreeConst(tree);
5040 #endif // _TARGET_64BIT_
5044 /* Operands that are in memory can usually be narrowed
5045 simply by changing their gtType */
5048 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5049 if (dstSize == sizeof(int))
5062 noway_assert(doit == false);
5066 if (kind & (GTK_BINOP | GTK_UNOP))
5069 op1 = tree->gtOp.gtOp1;
5071 op2 = tree->gtOp.gtOp2;
5073 switch (tree->gtOper)
5076 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5078 // Is op2 a small constant than can be narrowed into dstt?
5079 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5080 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5082 // We will change the type of the tree and narrow op2
5086 tree->gtType = genActualType(dstt);
5087 tree->SetVNs(vnpNarrow);
5089 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5090 // We may also need to cast away the upper bits of op1
5093 assert(tree->gtType == TYP_INT);
5094 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5096 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5098 tree->gtOp.gtOp1 = op1;
5109 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5111 noway_assert(doit == false);
5119 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5120 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5122 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5123 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5125 noway_assert(doit == false);
5129 /* Simply change the type of the tree */
5133 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5135 tree->gtFlags &= ~GTF_MUL_64RSLT;
5138 tree->gtType = genActualType(dstt);
5139 tree->SetVNs(vnpNarrow);
5147 /* Simply change the type of the tree */
5149 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5151 tree->gtType = genSignedType(dstt);
5152 tree->SetVNs(vnpNarrow);
5154 /* Make sure we don't mess up the variable type */
5155 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5157 tree->gtFlags |= GTF_VAR_CAST;
5170 /* These can always be narrowed since they only represent 0 or 1 */
5175 var_types cast = tree->CastToType();
5176 var_types oprt = op1->TypeGet();
5177 unsigned oprSize = genTypeSize(oprt);
5184 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5189 if (tree->gtOverflow())
5194 /* Is this a cast from the type we're narrowing to or a smaller one? */
5196 if (oprSize <= dstSize)
5198 /* Bash the target type of the cast */
5202 dstt = genSignedType(dstt);
5204 if (oprSize == dstSize)
5206 // Same size: change the CAST into a NOP
5207 tree->ChangeOper(GT_NOP);
5208 tree->gtType = dstt;
5209 tree->gtOp.gtOp2 = nullptr;
5210 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5214 // oprSize is smaller
5215 assert(oprSize < dstSize);
5217 // Change the CastToType in the GT_CAST node
5218 tree->CastToType() = dstt;
5220 // The result type of a GT_CAST is never a small type.
5221 // Use genActualType to widen dstt when it is a small types.
5222 tree->gtType = genActualType(dstt);
5223 tree->SetVNs(vnpNarrow);
5233 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5235 /* Simply change the type of the tree */
5239 tree->gtType = genActualType(dstt);
5240 tree->SetVNs(vnpNarrow);
5247 noway_assert(doit == false);
5255 /*****************************************************************************
5257 * The following logic figures out whether the given variable is assigned
5258 * somewhere in a list of basic blocks (or in an entire loop).
5261 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5263 GenTreePtr tree = *pTree;
5265 if (tree->OperKind() & GTK_ASGOP)
5267 GenTreePtr dest = tree->gtOp.gtOp1;
5268 genTreeOps destOper = dest->OperGet();
5270 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5271 assert(desc && desc->ivaSelf == desc);
5273 if (destOper == GT_LCL_VAR)
5275 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5276 if (tvar < lclMAX_ALLSET_TRACKED)
5278 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5282 desc->ivaMaskIncomplete = true;
5285 if (tvar == desc->ivaVar)
5287 if (tree != desc->ivaSkip)
5293 else if (destOper == GT_LCL_FLD)
5295 /* We can't track every field of every var. Moreover, indirections
5296 may access different parts of the var as different (but
5297 overlapping) fields. So just treat them as indirect accesses */
5299 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5300 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5302 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5303 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5305 else if (destOper == GT_CLS_VAR)
5307 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5309 else if (destOper == GT_IND)
5311 /* Set the proper indirection bits */
5313 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5314 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5317 else if (tree->gtOper == GT_CALL)
5319 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5320 assert(desc && desc->ivaSelf == desc);
5322 desc->ivaMaskCall = optCallInterf(tree);
5325 return WALK_CONTINUE;
5328 /*****************************************************************************/
5330 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5335 desc.ivaSkip = skip;
5337 desc.ivaSelf = &desc;
5340 desc.ivaMaskCall = CALLINT_NONE;
5341 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5347 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5349 noway_assert(stmt->gtOper == GT_STMT);
5350 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5372 /*****************************************************************************/
5373 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5377 /* Get hold of the loop descriptor */
5379 noway_assert(lnum < optLoopCount);
5380 loop = optLoopTable + lnum;
5382 /* Do we already know what variables are assigned within this loop? */
5384 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5391 /* Prepare the descriptor used by the tree walker call-back */
5393 desc.ivaVar = (unsigned)-1;
5394 desc.ivaSkip = nullptr;
5396 desc.ivaSelf = &desc;
5398 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5399 desc.ivaMaskInd = VR_NONE;
5400 desc.ivaMaskCall = CALLINT_NONE;
5401 desc.ivaMaskIncomplete = false;
5403 /* Now walk all the statements of the loop */
5405 beg = loop->lpHead->bbNext;
5406 end = loop->lpBottom;
5408 for (/**/; /**/; beg = beg->bbNext)
5412 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5414 noway_assert(stmt->gtOper == GT_STMT);
5415 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5417 if (desc.ivaMaskIncomplete)
5419 loop->lpFlags |= LPFLG_ASGVARS_INC;
5429 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5430 loop->lpAsgInds = desc.ivaMaskInd;
5431 loop->lpAsgCall = desc.ivaMaskCall;
5433 /* Now we know what variables are assigned in the loop */
5435 loop->lpFlags |= LPFLG_ASGVARS_YES;
5438 /* Now we can finally test the caller's mask against the loop's */
5439 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5444 switch (loop->lpAsgCall)
5448 /* Can't hoist if the call might have side effect on an indirection. */
5450 if (loop->lpAsgInds != VR_NONE)
5457 case CALLINT_REF_INDIRS:
5459 /* Can't hoist if the call might have side effect on an ref indirection. */
5461 if (loop->lpAsgInds & VR_IND_REF)
5468 case CALLINT_SCL_INDIRS:
5470 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5472 if (loop->lpAsgInds & VR_IND_SCL)
5479 case CALLINT_ALL_INDIRS:
5481 /* Can't hoist if the call might have side effect on any indirection. */
5483 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5492 /* Other helpers kill nothing */
5497 noway_assert(!"Unexpected lpAsgCall value");
5503 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5508 printf("\nHoisting a copy of ");
5509 printTreeID(origExpr);
5510 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5511 optLoopTable[lnum].lpBottom->bbNum);
5512 gtDispTree(origExpr);
5517 // This loop has to be in a form that is approved for hoisting.
5518 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5520 // Create a copy of the expression and mark it for CSE's.
5521 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5523 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5524 assert(hoistExpr != origExpr);
5525 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5527 GenTreePtr hoist = hoistExpr;
5528 // The value of the expression isn't used (unless it's an assignment).
5529 if (hoistExpr->OperGet() != GT_ASG)
5531 hoist = gtUnusedValNode(hoistExpr);
5534 /* Put the statement in the preheader */
5536 fgCreateLoopPreHeader(lnum);
5538 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5539 assert(preHead->bbJumpKind == BBJ_NONE);
5541 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5542 // (or in this case, will contain) the expression.
5543 compCurBB = preHead;
5545 // Increment the ref counts of any local vars appearing in "hoist".
5546 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5547 // fold away some of the lcl vars referenced by "hoist".
5548 lvaRecursiveIncRefCounts(hoist);
5550 hoist = fgMorphTree(hoist);
5552 GenTreePtr hoistStmt = gtNewStmt(hoist);
5553 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5555 /* simply append the statement at the end of the preHead's list */
5557 GenTreePtr treeList = preHead->bbTreeList;
5561 /* append after last statement */
5563 GenTreePtr last = treeList->gtPrev;
5564 assert(last->gtNext == nullptr);
5566 last->gtNext = hoistStmt;
5567 hoistStmt->gtPrev = last;
5568 treeList->gtPrev = hoistStmt;
5572 /* Empty pre-header - store the single statement in the block */
5574 preHead->bbTreeList = hoistStmt;
5575 hoistStmt->gtPrev = hoistStmt;
5578 hoistStmt->gtNext = nullptr;
5583 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5588 if (fgStmtListThreaded)
5590 gtSetStmtInfo(hoistStmt);
5591 fgSetStmtSeq(hoistStmt);
5595 if (m_nodeTestData != nullptr)
5598 // What is the depth of the loop "lnum"?
5600 unsigned lnumIter = lnum;
5601 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5604 lnumIter = optLoopTable[lnumIter].lpParent;
5607 NodeToTestDataMap* testData = GetNodeTestData();
5609 TestLabelAndNum tlAndN;
5610 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5612 if (tlAndN.m_num == -1)
5615 printTreeID(origExpr);
5616 printf(" was declared 'do not hoist', but is being hoisted.\n");
5619 else if (tlAndN.m_num != depth)
5622 printTreeID(origExpr);
5623 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5625 tlAndN.m_num, depth);
5630 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5631 // hoist" annotations.
5632 testData->Remove(origExpr);
5633 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5634 tlAndN.m_tl = TL_CSE_Def;
5635 tlAndN.m_num = m_loopHoistCSEClass++;
5636 testData->Set(hoistExpr, tlAndN);
5642 #if LOOP_HOIST_STATS
5643 if (!m_curLoopHasHoistedExpression)
5645 m_loopsWithHoistedExpressions++;
5646 m_curLoopHasHoistedExpression = true;
5648 m_totalHoistedExpressions++;
5649 #endif // LOOP_HOIST_STATS
5652 void Compiler::optHoistLoopCode()
5654 // If we don't have any loops in the method then take an early out now.
5655 if (optLoopCount == 0)
5661 unsigned jitNoHoist = JitConfig.JitNoHoist();
5669 // The code in this #if has been useful in debugging loop cloning issues, by
5670 // enabling selective enablement of the loop cloning optimization according to
5673 unsigned methHash = info.compMethodHash();
5674 char* lostr = getenv("loophoisthashlo");
5675 unsigned methHashLo = 0;
5678 sscanf_s(lostr, "%x", &methHashLo);
5679 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5681 char* histr = getenv("loophoisthashhi");
5682 unsigned methHashHi = UINT32_MAX;
5685 sscanf_s(histr, "%x", &methHashHi);
5686 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5688 if (methHash < methHashLo || methHash > methHashHi)
5690 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5692 #endif // 0 -- debugging loop cloning issues
5697 printf("\n*************** In optHoistLoopCode()\n");
5698 printf("Blocks/Trees before phase\n");
5699 fgDispBasicBlocks(true);
5704 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5705 // they are invariant.)
5706 LoopHoistContext hoistCtxt(this);
5707 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5709 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5714 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5716 optHoistLoopNest(lnum, &hoistCtxt);
5725 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5726 fgDispBasicBlocks(true);
5730 // Make sure that the predecessor lists are accurate
5731 fgDebugCheckBBlist();
5736 // Test Data stuff..
5737 // If we have no test data, early out.
5738 if (m_nodeTestData == nullptr)
5742 NodeToTestDataMap* testData = GetNodeTestData();
5743 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5745 TestLabelAndNum tlAndN;
5746 GenTreePtr node = ki.Get();
5747 bool b = testData->Lookup(node, &tlAndN);
5749 if (tlAndN.m_tl != TL_LoopHoist)
5753 // Otherwise, it is a loop hoist annotation.
5754 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5755 if (tlAndN.m_num >= 0)
5759 printf(" was declared 'must hoist', but has not been hoisted.\n");
5766 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5768 // Do this loop, then recursively do all nested loops.
5769 CLANG_FORMAT_COMMENT_ANCHOR;
5771 #if LOOP_HOIST_STATS
5773 m_curLoopHasHoistedExpression = false;
5774 m_loopsConsidered++;
5775 #endif // LOOP_HOIST_STATS
5777 optHoistThisLoop(lnum, hoistCtxt);
5779 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5781 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5783 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5784 // TODO-Cleanup: we should have a set abstraction for loops.
5785 if (hoistedInCurLoop != nullptr)
5787 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5791 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5793 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5797 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5798 child = optLoopTable[child].lpSibling)
5800 optHoistLoopNest(child, hoistCtxt);
5804 // TODO-Cleanup: we should have a set abstraction for loops.
5805 if (hoistedInCurLoop != nullptr)
5807 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5809 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5810 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5816 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5818 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5820 /* If loop was removed continue */
5822 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5827 /* Get the head and tail of the loop */
5829 BasicBlock* head = pLoopDsc->lpHead;
5830 BasicBlock* tail = pLoopDsc->lpBottom;
5831 BasicBlock* lbeg = pLoopDsc->lpEntry;
5834 // We must have a do-while loop
5835 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5840 // The loop-head must dominate the loop-entry.
5841 // TODO-CQ: Couldn't we make this true if it's not?
5842 if (!fgDominate(head, lbeg))
5847 // if lbeg is the start of a new try block then we won't be able to hoist
5848 if (!BasicBlock::sameTryRegion(head, lbeg))
5853 // We don't bother hoisting when inside of a catch block
5854 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5859 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5861 unsigned begn = lbeg->bbNum;
5862 unsigned endn = tail->bbNum;
5864 // Ensure the per-loop sets/tables are empty.
5865 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5870 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5871 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5875 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5877 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5878 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5879 pLoopDsc->lpHoistedExprCount = 0;
5881 #ifndef _TARGET_64BIT_
5882 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5884 if (longVarsCount > 0)
5886 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5887 // the Counts such that each TYP_LONG variable counts twice.
5889 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5890 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5895 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5896 lvaDispVarSet(lvaLongVars);
5899 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5900 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5902 #endif // !_TARGET_64BIT_
5907 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5908 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5910 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5911 lvaDispVarSet(pLoopDsc->lpVarInOut);
5913 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5914 lvaDispVarSet(loopVars);
5919 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5921 if (floatVarsCount > 0)
5923 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5924 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5926 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5927 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5928 pLoopDsc->lpHoistedFPExprCount = 0;
5930 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5931 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5936 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5937 lvaDispVarSet(inOutFPVars);
5939 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5940 lvaDispVarSet(loopFPVars);
5944 else // (floatVarsCount == 0)
5946 pLoopDsc->lpLoopVarFPCount = 0;
5947 pLoopDsc->lpVarInOutFPCount = 0;
5948 pLoopDsc->lpHoistedFPExprCount = 0;
5951 // Find the set of definitely-executed blocks.
5952 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5953 // Until we have post-dominators, we'll special-case for single-exit blocks.
5954 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5955 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5957 assert(pLoopDsc->lpExit != nullptr);
5958 BasicBlock* cur = pLoopDsc->lpExit;
5959 // Push dominators, until we reach "entry" or exit the loop.
5960 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5965 // If we didn't reach the entry block, give up and *just* push the entry block.
5966 if (cur != pLoopDsc->lpEntry)
5970 defExec.Push(pLoopDsc->lpEntry);
5972 else // More than one exit
5974 // We'll assume that only the entry block is definitely executed.
5975 // We could in the future do better.
5976 defExec.Push(pLoopDsc->lpEntry);
5979 while (defExec.Size() > 0)
5981 // Consider in reverse order: dominator before dominatee.
5982 BasicBlock* blk = defExec.Pop();
5983 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5987 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5988 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
5990 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5991 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5992 unsigned blkWeight = blk->getBBWeight(this);
5997 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
5998 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
5999 firstBlockAndBeforeSideEffect ? "true" : "false");
6000 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6002 printf(" block weight is too small to perform hoisting.\n");
6007 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6009 // Block weight is too small to perform hoisting.
6013 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6015 GenTreePtr stmtTree = stmt->gtStmtExpr;
6017 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6020 // we will try to hoist the top-level stmtTree
6021 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6026 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6028 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6030 bool loopContainsCall = pLoopDsc->lpContainsCall;
6033 int hoistedExprCount;
6037 if (varTypeIsFloating(tree->TypeGet()))
6039 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6040 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6041 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6043 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6044 if (!loopContainsCall)
6046 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6049 // For ARM each double takes two FP registers
6050 // For now on ARM we won't track singles/doubles
6051 // and instead just assume that we always have doubles.
6058 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6059 loopVarCount = pLoopDsc->lpLoopVarCount;
6060 varInOutCount = pLoopDsc->lpVarInOutCount;
6062 availRegCount = CNT_CALLEE_SAVED - 1;
6063 if (!loopContainsCall)
6065 availRegCount += CNT_CALLEE_TRASH - 1;
6067 #ifndef _TARGET_64BIT_
6068 // For our 32-bit targets Long types take two registers.
6069 if (varTypeIsLong(tree->TypeGet()))
6071 availRegCount = (availRegCount + 1) / 2;
6076 // decrement the availRegCount by the count of expression that we have already hoisted.
6077 availRegCount -= hoistedExprCount;
6079 // the variables that are read/written inside the loop should
6080 // always be a subset of the InOut variables for the loop
6081 assert(loopVarCount <= varInOutCount);
6083 // When loopVarCount >= availRegCount we believe that all of the
6084 // available registers will get used to hold LclVars inside the loop.
6085 // This pessimistically assumes that each loopVar has a conflicting
6086 // lifetime with every other loopVar.
6087 // For this case we will hoist the expression only if is profitable
6088 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6089 // as we believe it will be placed in the stack or one of the other
6090 // loopVars will be spilled into the stack
6092 if (loopVarCount >= availRegCount)
6094 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6095 if (tree->gtCostEx < (2 * IND_COST_EX))
6101 // When varInOutCount < availRegCount we are know that there are
6102 // some available register(s) when we enter the loop body.
6103 // When varInOutCount == availRegCount there often will be a register
6104 // available when we enter the loop body, since a loop often defines a
6105 // LclVar on exit or there is often at least one LclVar that is worth
6106 // spilling to the stack to make way for this hoisted expression.
6107 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6109 if (varInOutCount > availRegCount)
6111 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6112 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6122 // This function returns true if 'tree' is a loop invariant expression.
6123 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6125 bool Compiler::optHoistLoopExprsForTree(
6126 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6128 // First do the children.
6129 // We must keep track of whether each child node was hoistable or not
6131 unsigned nChildren = tree->NumChildren();
6132 bool childrenHoistable[GenTree::MAX_CHILDREN];
6134 // Initialize the array elements for childrenHoistable[] to false
6135 for (unsigned i = 0; i < nChildren; i++)
6137 childrenHoistable[i] = false;
6140 bool treeIsInvariant = true;
6141 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6143 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6144 &childrenHoistable[childNum]))
6146 treeIsInvariant = false;
6150 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6152 bool treeIsHoistable = treeIsInvariant;
6154 // But we must see if anything else prevents "tree" from being hoisted.
6156 if (treeIsInvariant)
6158 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6159 treeIsHoistable = optIsCSEcandidate(tree);
6161 // If it's a call, it must be a helper call, and be pure.
6162 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6163 // (meaning it won't run a cctor because the class is not precise-init).
6164 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6166 GenTreeCall* call = tree->AsCall();
6167 if (call->gtCallType != CT_HELPER)
6169 treeIsHoistable = false;
6173 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6174 if (!s_helperCallProperties.IsPure(helpFunc))
6176 treeIsHoistable = false;
6178 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6180 treeIsHoistable = false;
6185 if (treeIsHoistable)
6187 if (!(*pFirstBlockAndBeforeSideEffect))
6189 // For now, we give up on an expression that might raise an exception if it is after the
6190 // first possible global side effect (and we assume we're after that if we're not in the first block).
6191 // TODO-CQ: this is when we might do loop cloning.
6193 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6195 treeIsHoistable = false;
6198 // Currently we must give up on reads from static variables (even if we are in the first block).
6200 if (tree->OperGet() == GT_CLS_VAR)
6202 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6204 treeIsHoistable = false;
6208 // Is the value of the whole tree loop invariant?
6210 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6212 // Is the value of the whole tree loop invariant?
6213 if (!treeIsInvariant)
6215 treeIsHoistable = false;
6219 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6220 // If we encounter a tree with a call in it
6221 // or if we see an assignment to global we set it to false.
6223 // If we are already set to false then we can skip these checks
6225 if (*pFirstBlockAndBeforeSideEffect)
6227 // For this purpose, we only care about memory side effects. We assume that expressions will
6228 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6229 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6230 // here, since that includes exceptions.)
6233 // If it's a call, it must be a helper call that does not mutate the heap.
6234 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6235 // (meaning it won't run a cctor because the class is not precise-init).
6236 GenTreeCall* call = tree->AsCall();
6237 if (call->gtCallType != CT_HELPER)
6239 *pFirstBlockAndBeforeSideEffect = false;
6243 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6244 if (s_helperCallProperties.MutatesHeap(helpFunc))
6246 *pFirstBlockAndBeforeSideEffect = false;
6248 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6250 *pFirstBlockAndBeforeSideEffect = false;
6254 else if (tree->OperIsAssignment())
6256 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6257 GenTreePtr lhs = tree->gtOp.gtOp1;
6258 if (lhs->gtFlags & GTF_GLOB_REF)
6260 *pFirstBlockAndBeforeSideEffect = false;
6263 else if (tree->OperIsCopyBlkOp())
6265 GenTreePtr args = tree->gtOp.gtOp1;
6266 assert(args->OperGet() == GT_LIST);
6267 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6269 *pFirstBlockAndBeforeSideEffect = false;
6274 // If this 'tree' is hoistable then we return and the caller will
6275 // decide to hoist it as part of larger hoistable expression.
6277 if (!treeIsHoistable)
6279 // We are not hoistable so we will now hoist any hoistable children.
6281 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6283 if (childrenHoistable[childNum])
6285 // We can't hoist the LHS of an assignment, isn't a real use.
6286 if (childNum == 0 && (tree->OperIsAssignment()))
6291 GenTreePtr child = tree->GetChild(childNum);
6293 // We try to hoist this 'child' tree
6294 optHoistCandidate(child, lnum, hoistCtxt);
6299 *pHoistable = treeIsHoistable;
6300 return treeIsInvariant;
6303 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6305 if (lnum == BasicBlock::NOT_IN_LOOP)
6307 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6311 // The outer loop also must be suitable for hoisting...
6312 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6317 // If the hoisted expression isn't valid at this loop head then break
6318 if (!optTreeIsValidAtLoopHead(tree, lnum))
6323 // It must pass the hoistable profitablity tests for this loop level
6324 if (!optIsProfitableToHoistableTree(tree, lnum))
6330 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6332 // already hoisted in a parent loop, so don't hoist this expression.
6336 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6338 // already hoisted this expression in the current loop, so don't hoist this expression.
6342 // Expression can be hoisted
6343 optPerformHoistExpr(tree, lnum);
6345 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6346 if (!varTypeIsFloating(tree->TypeGet()))
6348 optLoopTable[lnum].lpHoistedExprCount++;
6349 #ifndef _TARGET_64BIT_
6350 // For our 32-bit targets Long types take two registers.
6351 if (varTypeIsLong(tree->TypeGet()))
6353 optLoopTable[lnum].lpHoistedExprCount++;
6357 else // Floating point expr hoisted
6359 optLoopTable[lnum].lpHoistedFPExprCount++;
6362 // Record the hoisted expression in hoistCtxt
6363 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6366 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6368 // If it is not a VN, is not loop-invariant.
6369 if (vn == ValueNumStore::NoVN)
6374 // We'll always short-circuit constants.
6375 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6380 // If we've done this query previously, don't repeat.
6381 bool previousRes = false;
6382 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6389 if (vnStore->GetVNFunc(vn, &funcApp))
6391 if (funcApp.m_func == VNF_PhiDef)
6393 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6394 VNFuncApp phiDefValFuncApp;
6395 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6397 // It's not *really* a definition, rather a pass-through of some other VN.
6398 // (This could occur, say if both sides of an if-then-else diamond made the
6399 // same assignment to a variable.)
6400 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6404 // Is the definition within the loop? If so, is not loop-invariant.
6405 unsigned lclNum = funcApp.m_args[0];
6406 unsigned ssaNum = funcApp.m_args[1];
6407 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6408 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6411 else if (funcApp.m_func == VNF_PhiHeapDef)
6413 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6414 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6418 for (unsigned i = 0; i < funcApp.m_arity; i++)
6420 // TODO-CQ: We need to either make sure that *all* VN functions
6421 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6423 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6433 // Non-function "new, unique" VN's may be annotated with the loop nest where
6434 // their definition occurs.
6435 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
6437 if (vnLoopNum == MAX_LOOP_NUM)
6443 res = !optLoopContains(lnum, vnLoopNum);
6447 loopVnInvariantCache->Set(vn, res);
6451 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6453 if (tree->OperIsLocal())
6455 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6456 unsigned lclNum = lclVar->gtLclNum;
6458 // The lvlVar must be have an Ssa tracked lifetime
6459 if (fgExcludeFromSsa(lclNum))
6464 // If the loop does not contains the SSA def we can hoist it.
6465 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6470 else if (tree->OperIsConst())
6474 else // If every one of the children nodes are valid at this Loop's Head.
6476 unsigned nChildren = tree->NumChildren();
6477 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6479 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6489 /*****************************************************************************
6491 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6492 * header. The pre-header will replace the current lpHead in the loop table.
6493 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6494 * will also be dominated by the loop-top, lpHead->bbNext.
6498 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6500 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6502 /* This loop has to be a "do-while" loop */
6504 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6506 /* Have we already created a loop-preheader block? */
6508 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6513 BasicBlock* head = pLoopDsc->lpHead;
6514 BasicBlock* top = pLoopDsc->lpTop;
6515 BasicBlock* entry = pLoopDsc->lpEntry;
6517 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6518 if (!BasicBlock::sameTryRegion(head, entry))
6523 // Ensure that lpHead always dominates lpEntry
6525 noway_assert(fgDominate(head, entry));
6527 /* Get hold of the first block of the loop body */
6529 assert(top == entry);
6531 /* Allocate a new basic block */
6533 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6534 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6536 // Must set IL code offset
6537 preHead->bbCodeOffs = top->bbCodeOffs;
6539 // Set the default value of the preHead weight in case we don't have
6540 // valid profile data and since this blocks weight is just an estimate
6541 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6543 preHead->inheritWeight(head);
6544 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6549 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6550 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6554 // The preheader block is part of the containing loop (if any).
6555 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6557 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6559 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6561 preHead->bbWeight = 0;
6562 preHead->bbFlags |= BBF_RUN_RARELY;
6566 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6567 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6568 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6570 if (allValidProfileWeights)
6572 double loopEnteredCount;
6573 double loopSkippedCount;
6575 if (fgHaveValidEdgeWeights)
6577 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6578 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6579 noway_assert(edgeToNext != nullptr);
6580 noway_assert(edgeToJump != nullptr);
6583 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6585 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6589 loopEnteredCount = (double)head->bbNext->bbWeight;
6590 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6593 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6595 // Calculate a good approximation of the preHead's block weight
6596 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6597 preHead->setBBWeight(max(preHeadWeight, 1));
6598 noway_assert(!preHead->isRunRarely());
6603 // Link in the preHead block.
6604 fgInsertBBbefore(top, preHead);
6606 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6607 // However, that is too expensive at this point. Instead, we update the phi
6608 // node block references, if we created pre-header block due to hoisting.
6609 // This is sufficient because any definition participating in SSA that flowed
6610 // into the phi via the loop header block will now flow through the preheader
6611 // block from the header block.
6613 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6615 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6616 if (tree->OperGet() != GT_ASG)
6620 GenTreePtr op2 = tree->gtGetOp2();
6621 if (op2->OperGet() != GT_PHI)
6625 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6626 while (args != nullptr)
6628 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6629 if (phiArg->gtPredBB == head)
6631 phiArg->gtPredBB = preHead;
6633 args = args->Rest();
6637 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6638 // to set the handler index on the pre header without updating the exception table.
6639 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6641 // Update the EH table to make the hoisted block part of the loop's EH block.
6642 fgExtendEHRegionBefore(top);
6644 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6645 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6647 /* Update the loop entry */
6649 pLoopDsc->lpHead = preHead;
6650 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6652 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6653 All predecessors of 'beg', (which is the entry in the loop)
6654 now have to jump to 'preHead', unless they are dominated by 'head' */
6656 preHead->bbRefs = 0;
6657 fgAddRefPred(preHead, head);
6658 bool checkNestedLoops = false;
6660 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6662 BasicBlock* predBlock = pred->flBlock;
6664 if (fgDominate(top, predBlock))
6666 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6667 // (we know that 'head' dominates 'top'), but using 'top' instead of
6668 // 'head' in the test allows us to not enter here if 'predBlock == head'
6670 if (predBlock != pLoopDsc->lpBottom)
6672 noway_assert(predBlock != head);
6673 checkNestedLoops = true;
6678 switch (predBlock->bbJumpKind)
6681 noway_assert(predBlock == head);
6685 if (predBlock == head)
6687 noway_assert(predBlock->bbJumpDest != top);
6693 case BBJ_EHCATCHRET:
6694 noway_assert(predBlock->bbJumpDest == top);
6695 predBlock->bbJumpDest = preHead;
6696 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6698 if (predBlock == head)
6700 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6701 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6702 // Just break, pred will be removed after switch.
6706 fgRemoveRefPred(top, predBlock);
6707 fgAddRefPred(preHead, predBlock);
6713 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6714 BasicBlock** jumpTab;
6715 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6720 if ((*jumpTab) == top)
6722 (*jumpTab) = preHead;
6724 fgRemoveRefPred(top, predBlock);
6725 fgAddRefPred(preHead, predBlock);
6726 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6728 } while (++jumpTab, --jumpCnt);
6731 noway_assert(!"Unexpected bbJumpKind");
6736 noway_assert(!fgGetPredForBlock(top, preHead));
6737 fgRemoveRefPred(top, head);
6738 fgAddRefPred(top, preHead);
6741 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6742 (other than the back-edge of the loop we are considering) then we likely have nested
6743 do-while loops with the same entry block and inserting the preheader block changes the head
6744 of all the nested loops. Now we will update this piece of information in the loop table, and
6745 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6746 do-while loops with the same entry block).
6748 if (checkNestedLoops)
6750 for (unsigned l = 0; l < optLoopCount; l++)
6752 if (optLoopTable[l].lpHead == head)
6754 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6755 noway_assert(optLoopTable[l].lpEntry == top);
6756 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6757 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6761 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6762 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6770 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6772 unsigned lnum = blk->bbNatLoopNum;
6773 while (lnum != BasicBlock::NOT_IN_LOOP)
6775 if (optLoopTable[lnum].lpEntry == blk)
6780 lnum = optLoopTable[lnum].lpParent;
6785 void Compiler::optComputeLoopSideEffects()
6788 for (lnum = 0; lnum < optLoopCount; lnum++)
6790 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6791 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6792 optLoopTable[lnum].lpContainsCall = false;
6795 for (lnum = 0; lnum < optLoopCount; lnum++)
6797 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6802 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6803 { // Is outermost...
6804 optComputeLoopNestSideEffects(lnum);
6808 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6809 #ifndef _TARGET_64BIT_
6810 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6813 for (unsigned i = 0; i < lvaCount; i++)
6815 LclVarDsc* varDsc = &lvaTable[i];
6816 if (varDsc->lvTracked)
6818 if (varTypeIsFloating(varDsc->lvType))
6820 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6822 #ifndef _TARGET_64BIT_
6823 else if (varTypeIsLong(varDsc->lvType))
6825 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6832 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6834 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6835 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6836 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6838 optComputeLoopSideEffectsOfBlock(bbInLoop);
6842 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6844 unsigned mostNestedLoop = blk->bbNatLoopNum;
6845 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6847 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6849 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6851 // Now iterate over the remaining statements, and their trees.
6852 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6854 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6856 genTreeOps oper = tree->OperGet();
6858 // Even after we set heapHavoc we still may want to know if a loop contains calls
6861 if (oper == GT_CALL)
6863 // Record that this loop contains a call
6864 AddContainsCallAllContainingLoops(mostNestedLoop);
6867 // If we just set lpContainsCall or it was previously set
6868 if (optLoopTable[mostNestedLoop].lpContainsCall)
6870 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6874 // We are just looking for GT_CALL nodes after heapHavoc was set.
6878 // otherwise heapHavoc is not set
6881 // This body is a distillation of the heap-side effect code of value numbering.
6882 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6883 // that the compiler creates.
6885 if (GenTree::OperIsAssignment(oper))
6887 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6889 if (lhs->OperGet() == GT_IND)
6891 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6892 FieldSeqNode* fldSeqArrElem = nullptr;
6894 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6902 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6904 // If it's a local byref for which we recorded a value number, use that...
6905 GenTreeLclVar* argLcl = arg->AsLclVar();
6906 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6909 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6911 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6912 funcApp.m_func == VNF_PtrToArrElem)
6914 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6915 CORINFO_CLASS_HANDLE elemType =
6916 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6917 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6918 // Don't set heapHavoc below.
6925 // Is the LHS an array index expression?
6926 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6928 // We actually ignore "fldSeq" -- any modification to an S[], at any
6929 // field of "S", will lose all information about the array type.
6930 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6931 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6935 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6937 GenTreePtr obj = nullptr; // unused
6938 GenTreePtr staticOffset = nullptr; // unused
6939 FieldSeqNode* fldSeq = nullptr;
6941 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6942 (fldSeq != FieldSeqStore::NotAField()))
6944 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6945 assert(fldSeq != nullptr);
6946 if (fldSeq->IsFirstElemFieldSeq())
6948 fldSeq = fldSeq->m_next;
6949 assert(fldSeq != nullptr);
6952 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6960 else if (lhs->OperIsBlk())
6962 GenTreeLclVarCommon* lclVarTree;
6964 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6966 // For now, assume arbitrary side effects on the heap...
6970 else if (lhs->OperGet() == GT_CLS_VAR)
6972 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6974 // Otherwise, must be local lhs form. I should assert that.
6975 else if (lhs->OperGet() == GT_LCL_VAR)
6977 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6978 GenTreePtr rhs = tree->gtOp.gtOp2;
6979 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6980 // If we gave the RHS a value number, propagate it.
6981 if (rhsVN != ValueNumStore::NoVN)
6983 rhsVN = vnStore->VNNormVal(rhsVN);
6984 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6986 lvaTable[lhsLcl->GetLclNum()]
6987 .GetPerSsaData(lhsLcl->GetSsaNum())
6988 ->m_vnPair.SetLiberal(rhsVN);
6993 else // not GenTree::OperIsAssignment(oper)
6998 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7002 // Is it an addr of a array index expression?
7004 GenTreePtr addrArg = tree->gtOp.gtOp1;
7005 if (addrArg->OperGet() == GT_IND)
7007 // Is the LHS an array index expression?
7008 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7011 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7013 CORINFO_CLASS_HANDLE elemType =
7014 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7015 tree->gtVNPair.SetBoth(
7016 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7017 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7018 // The rest are dummy arguments.
7019 vnStore->VNForNull(), vnStore->VNForNull(),
7020 vnStore->VNForNull()));
7026 case GT_LOCKADD: // Binop
7027 case GT_XADD: // Binop
7028 case GT_XCHG: // Binop
7029 case GT_CMPXCHG: // Specialop
7037 GenTreeCall* call = tree->AsCall();
7039 // Record that this loop contains a call
7040 AddContainsCallAllContainingLoops(mostNestedLoop);
7042 if (call->gtCallType == CT_HELPER)
7044 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7045 if (s_helperCallProperties.MutatesHeap(helpFunc))
7049 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7051 // If the call is labeled as "Hoistable", then we've checked the
7052 // class that would be constructed, and it is not precise-init, so
7053 // the cctor will not be run by this call. Otherwise, it might be,
7054 // and might have arbitrary side effects.
7055 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7069 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7078 // Record that all loops containing this block have heap havoc effects.
7079 unsigned lnum = mostNestedLoop;
7080 while (lnum != BasicBlock::NOT_IN_LOOP)
7082 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7083 lnum = optLoopTable[lnum].lpParent;
7088 // Marks the containsCall information to "lnum" and any parent loops.
7089 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7091 assert(0 <= lnum && lnum < optLoopCount);
7092 while (lnum != BasicBlock::NOT_IN_LOOP)
7094 optLoopTable[lnum].lpContainsCall = true;
7095 lnum = optLoopTable[lnum].lpParent;
7099 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7100 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7102 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7103 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7105 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7106 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7109 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7110 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7112 assert(0 <= lnum && lnum < optLoopCount);
7113 while (lnum != BasicBlock::NOT_IN_LOOP)
7115 optLoopTable[lnum].AddVariableLiveness(this, blk);
7116 lnum = optLoopTable[lnum].lpParent;
7120 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7121 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7123 assert(0 <= lnum && lnum < optLoopCount);
7124 while (lnum != BasicBlock::NOT_IN_LOOP)
7126 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7127 lnum = optLoopTable[lnum].lpParent;
7131 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7132 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7134 assert(0 <= lnum && lnum < optLoopCount);
7135 while (lnum != BasicBlock::NOT_IN_LOOP)
7137 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7138 lnum = optLoopTable[lnum].lpParent;
7142 /*****************************************************************************
7144 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7145 * The 'keepList'is either a single tree or a list of trees that are formed by
7146 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7147 * gtExtractSideEffList method.
7151 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7153 GenTreePtr tree = *pTree;
7154 Compiler* comp = data->compiler;
7155 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7157 // We may have a non-NULL side effect list that is being kept
7161 GenTreePtr keptTree = keepList;
7162 while (keptTree->OperGet() == GT_COMMA)
7164 assert(keptTree->OperKind() & GTK_SMPOP);
7165 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7166 GenTreePtr op2 = keptTree->gtGetOp2();
7168 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7169 // that is being kept because it contains some side-effect
7173 // This tree and all of its sub trees are being kept.
7174 return WALK_SKIP_SUBTREES;
7177 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7178 // which can again be another GT_COMMA or the final side-effect part
7182 if (tree == keptTree)
7184 // This tree and all of its sub trees are being kept.
7185 return WALK_SKIP_SUBTREES;
7189 // This node is being removed from the graph of GenTreePtr
7191 // Look for any local variable references
7193 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7198 /* This variable ref is going away, decrease its ref counts */
7200 lclNum = tree->gtLclVarCommon.gtLclNum;
7201 assert(lclNum < comp->lvaCount);
7202 varDsc = comp->lvaTable + lclNum;
7204 // make sure it's been initialized
7205 assert(comp->compCurBB != nullptr);
7206 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7208 /* Decrement its lvRefCnt and lvRefCntWtd */
7210 // Use getBBWeight to determine the proper block weight.
7211 // This impacts the block weights when we have IBC data.
7212 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7215 return WALK_CONTINUE;
7218 /*****************************************************************************
7220 * Routine called to decrement the LclVar ref counts when removing a tree
7221 * during the remove RangeCheck phase.
7222 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7223 * unless the node is found in the 'keepList' (which are saved side effects)
7224 * The keepList is communicated using the walkData.pCallbackData field
7225 * Also the compCurBB must be set to the current BasicBlock which contains
7226 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7229 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7231 // We communicate this value using the walkData.pCallbackData field
7233 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7236 /*****************************************************************************
7238 * Given an array index node, mark it as not needing a range check.
7241 void Compiler::optRemoveRangeCheck(
7242 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7258 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7261 noway_assert(stmt->gtOper == GT_STMT);
7262 noway_assert(tree->gtOper == GT_COMMA);
7263 noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
7264 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7266 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7271 printf("Before optRemoveRangeCheck:\n");
7276 GenTreePtr sideEffList = nullptr;
7279 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7282 // Decrement the ref counts for any LclVars that are being deleted
7284 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7286 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7287 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7289 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7290 tree->gtFlags |= GTF_DONT_CSE;
7292 /* Recalculate the gtCostSz, etc... */
7293 gtSetStmtInfo(stmt);
7295 /* Re-thread the nodes if necessary */
7296 if (fgStmtListThreaded)
7304 printf("After optRemoveRangeCheck:\n");
7310 /*****************************************************************************
7311 * Return the scale in an array reference, given a pointer to the
7312 * multiplication node.
7315 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7318 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7319 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7321 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7323 if (mul->gtOper == GT_LSH)
7325 scale = ((ssize_t)1) << scale;
7328 GenTreePtr index = mul->gtOp.gtOp1;
7330 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7332 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7333 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7334 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7335 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7336 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7337 index = index->gtOp.gtOp1;
7340 assert(!bRngChk || index->gtOper != GT_COMMA);
7350 /*****************************************************************************
7351 * Find the last assignment to of the local variable in the block. Return
7352 * RHS or NULL. If any local variable in the RHS has been killed in
7353 * intervening code, return NULL. If the variable being searched for is killed
7354 * in the intervening code, return NULL.
7358 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7360 VARSET_TP* pKilledInOut,
7361 bool* pLhsRhsKilledAfterInit)
7363 assert(pKilledInOut);
7364 assert(pLhsRhsKilledAfterInit);
7366 *pLhsRhsKilledAfterInit = false;
7368 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7370 GenTreePtr list = block->bbTreeList;
7371 if (list == nullptr)
7376 GenTreePtr rhs = nullptr;
7377 GenTreePtr stmt = list;
7380 stmt = stmt->gtPrev;
7381 if (stmt == nullptr)
7386 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7387 // If we encounter an assignment to a local variable,
7388 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7390 // And the assigned variable equals the input local,
7391 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7393 // If the assignment is '=' and it is not a conditional, then return rhs.
7394 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7396 rhs = tree->gtOp.gtOp2;
7398 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7399 // as we found a kill to the input local.
7402 *pLhsRhsKilledAfterInit = true;
7403 assert(rhs == nullptr);
7409 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7410 if (varDsc == nullptr)
7414 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7417 } while (stmt != list);
7424 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7425 varRefKinds rhsRefs = VR_NONE;
7426 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7427 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7428 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7430 // If RHS has been indirectly referenced, consider it a write and a kill.
7431 *pLhsRhsKilledAfterInit = true;
7438 /*****************************************************************************
7440 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7445 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7447 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7449 add1 += op1->gtIntCon.gtIconVal;
7450 add2 += op2->gtIntCon.gtIconVal;
7454 /* Check for +/- constant on either operand */
7456 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7458 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7459 op1 = op1->gtOp.gtOp1;
7462 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7464 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7465 op2 = op2->gtOp.gtOp1;
7468 /* We only allow local variable references */
7470 if (op1->gtOper != GT_LCL_VAR)
7472 if (op2->gtOper != GT_LCL_VAR)
7474 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7477 /* NOTE: Caller ensures that this variable has only one def */
7479 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7480 // printf("size [%d]:\n", add2); gtDispTree(op2);
7484 return (bool)(add1 <= add2);
7489 //------------------------------------------------------------------------------
7490 // optObtainLoopCloningOpts: Identify optimization candidates and update
7491 // the "context" for array optimizations.
7494 // context - data structure where all loop cloning info is kept. The
7495 // optInfo fields of the context are updated with the
7496 // identified optimization candidates.
7498 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7500 for (unsigned i = 0; i < optLoopCount; i++)
7502 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7503 if (optIsLoopClonable(i))
7505 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7507 optIdentifyLoopOptInfo(i, context);
7510 JITDUMP("------------------------------------------------------------\n");
7515 //------------------------------------------------------------------------
7516 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7517 // check if the loop is suitable for the optimizations performed.
7520 // loopNum - the current loop index for which conditions are derived.
7521 // context - data structure where all loop cloning candidates will be
7525 // If the loop is not suitable for the optimizations, return false - context
7526 // should not contain any optimization candidate for the loop if false.
7527 // Else return true.
7530 // Check if the loop is well formed for this optimization and identify the
7531 // optimization candidates and update the "context" parameter with all the
7532 // contextual information necessary to perform the optimization later.
7534 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7536 noway_assert(loopNum < optLoopCount);
7538 LoopDsc* pLoop = &optLoopTable[loopNum];
7540 if (!(pLoop->lpFlags & LPFLG_ITER))
7542 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7546 unsigned ivLclNum = pLoop->lpIterVar();
7547 if (lvaVarAddrExposed(ivLclNum))
7549 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7553 BasicBlock* head = pLoop->lpHead;
7554 BasicBlock* end = pLoop->lpBottom;
7555 BasicBlock* beg = head->bbNext;
7557 if (end->bbJumpKind != BBJ_COND)
7559 JITDUMP("> Couldn't find termination test.\n");
7563 if (end->bbJumpDest != beg)
7565 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7569 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7570 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7572 JITDUMP("> Loop iteration operator not matching\n");
7576 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7577 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7579 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7583 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7584 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7585 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7586 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7588 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7589 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7593 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7595 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7596 pLoop->lpTestTree->gtTreeID);
7601 GenTreePtr op1 = pLoop->lpIterator();
7602 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7605 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7606 end->bbNext ? end->bbNext->bbNum : 0);
7608 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7609 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7612 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7615 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7622 //---------------------------------------------------------------------------------------------------------------
7623 // optExtractArrIndex: Try to extract the array index from "tree".
7626 // tree the tree to be checked if it is the array [] operation.
7627 // result the extracted GT_INDEX information is updated in result.
7628 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7631 // Returns true if array index can be extracted, else, return false. See assumption about
7632 // what will be extracted. The "result" variable's rank parameter is advanced for every
7633 // dimension of [] encountered.
7636 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7637 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7638 // to reconstruct this to be able to know if this is an array access.
7641 // The method extracts only if the array base and indices are GT_LCL_VAR.
7643 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7645 // [000000001AF828D8] ---XG------- indir int
7646 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7647 // [000000001AF87340] ------------ + byref
7648 // [000000001AF87160] -------N---- const long 2
7649 // [000000001AF871D8] ------------ << long
7650 // [000000001AF870C0] ------------ cast long <- int
7651 // [000000001AF86F30] i----------- lclVar int V04 loc0
7652 // [000000001AF87250] ------------ + byref
7653 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7654 // [000000001AF87468] ---XG------- comma int
7655 // [000000001AF87020] ---X-------- arrBndsChk void
7656 // [000000001AF86FA8] ---X-------- arrLen int
7657 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7658 // [000000001AF82860] ------------ lclVar int V04 loc0
7659 // [000000001AF829F0] -A-XG------- = int
7660 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7662 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7664 if (tree->gtOper != GT_COMMA)
7668 GenTreePtr before = tree->gtGetOp1();
7669 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7673 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7674 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7678 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7682 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7683 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7688 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7690 GenTreePtr after = tree->gtGetOp2();
7692 if (after->gtOper != GT_IND)
7696 // It used to be the case that arrBndsChks for struct types would fail the previous check because
7697 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
7698 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
7699 // that we were not previously cloning.)
7700 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
7702 if (varTypeIsStruct(after))
7707 GenTreePtr sibo = after->gtGetOp1();
7708 if (sibo->gtOper != GT_ADD)
7712 GenTreePtr sib = sibo->gtGetOp1();
7713 GenTreePtr ofs = sibo->gtGetOp2();
7714 if (ofs->gtOper != GT_CNS_INT)
7718 if (sib->gtOper != GT_ADD)
7722 GenTreePtr si = sib->gtGetOp2();
7723 GenTreePtr base = sib->gtGetOp1();
7724 if (si->gtOper != GT_LSH)
7728 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7732 GenTreePtr scale = si->gtGetOp2();
7733 GenTreePtr index = si->gtGetOp1();
7734 if (scale->gtOper != GT_CNS_INT)
7738 #ifdef _TARGET_AMD64_
7739 if (index->gtOper != GT_CAST)
7743 GenTreePtr indexVar = index->gtGetOp1();
7745 GenTreePtr indexVar = index;
7747 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7751 if (lhsNum == BAD_VAR_NUM)
7753 result->arrLcl = arrLcl;
7755 result->indLcls.Push(indLcl);
7756 result->bndsChks.Push(tree);
7757 result->useBlock = compCurBB;
7763 //---------------------------------------------------------------------------------------------------------------
7764 // optReconstructArrIndex: Reconstruct array index.
7767 // tree the tree to be checked if it is an array [][][] operation.
7768 // result the extracted GT_INDEX information.
7769 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7772 // Returns true if array index can be extracted, else, return false. "rank" field in
7773 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7776 // Recursively look for a list of array indices. In the example below, we encounter,
7777 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7778 // The return value would then be:
7779 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7781 // V00[V01][V02] would be morphed as:
7783 // [000000001B366848] ---XG------- indir int
7784 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7785 // [000000001B36C200] ---XG------- comma int
7786 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7787 // [000000001B36C278] -A-XG------- comma int
7788 // [000000001B366730] R--XG------- indir ref
7789 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7790 // [000000001B36C818] ---XG------- comma ref
7791 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7792 // [000000001B36BB60] -A-XG------- = ref
7793 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7794 // [000000001B36A668] -A-XG------- = int
7795 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7798 // The method extracts only if the array base and indices are GT_LCL_VAR.
7800 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7802 // If we can extract "tree" (which is a top level comma) return.
7803 if (optExtractArrIndex(tree, result, lhsNum))
7807 // We have a comma (check if array base expr is computed in "before"), descend further.
7808 else if (tree->OperGet() == GT_COMMA)
7810 GenTreePtr before = tree->gtGetOp1();
7811 // "before" should evaluate an array base for the "after" indexing.
7812 if (before->OperGet() != GT_ASG)
7816 GenTreePtr lhs = before->gtGetOp1();
7817 GenTreePtr rhs = before->gtGetOp2();
7819 // "rhs" should contain an GT_INDEX
7820 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7824 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7825 GenTreePtr after = tree->gtGetOp2();
7826 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7827 return optExtractArrIndex(after, result, lhsNum);
7833 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7835 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7838 //-------------------------------------------------------------------------
7839 // optIsStackLocalInvariant: Is stack local invariant in loop.
7842 // loopNum The loop in which the variable is tested for invariance.
7843 // lclNum The local that is tested for invariance in the loop.
7846 // Returns true if the variable is loop invariant in loopNum.
7848 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7850 if (lvaVarAddrExposed(lclNum))
7854 if (optIsVarAssgLoop(loopNum, lclNum))
7861 //----------------------------------------------------------------------------------------------
7862 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7863 // identify as potential candidate and update the loop context.
7866 // tree The tree encountered during the tree walk.
7867 // info Supplies information about the current block or stmt in which the tree is.
7868 // Also supplies the "context" pointer for updating with loop cloning
7869 // candidates. Also supplies loopNum.
7872 // If array index can be reconstructed, check if the iter var of the loop matches the
7873 // array index var in some dim. Also ensure other index vars before the identified
7874 // dim are loop invariant.
7877 // Skip sub trees if the optimization candidate is identified or else continue walking
7879 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7881 ArrIndex arrIndex(getAllocator());
7883 // Check if array index can be optimized.
7884 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7886 assert(tree->gtOper == GT_COMMA);
7890 JITDUMP("Found ArrIndex at tree ");
7892 printf(" which is equivalent to: ");
7897 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7899 return WALK_SKIP_SUBTREES;
7902 // Walk the dimensions and see if iterVar of the loop is used as index.
7903 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7905 // Is index variable also used as the loop iter var.
7906 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7908 // Check the previous indices are all loop invariant.
7909 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7911 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7913 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7914 return WALK_SKIP_SUBTREES;
7920 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7922 JITDUMP(" on dim %d\n", dim);
7925 // Update the loop context.
7926 info->context->EnsureLoopOptInfo(info->loopNum)
7927 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7931 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7935 return WALK_SKIP_SUBTREES;
7937 else if (tree->gtOper == GT_ARR_ELEM)
7939 // TODO-CQ: CLONE: Implement.
7940 return WALK_SKIP_SUBTREES;
7942 return WALK_CONTINUE;
7945 struct optRangeCheckDsc
7947 Compiler* pCompiler;
7951 Walk to make sure that only locals and constants are contained in the index
7954 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7956 GenTreePtr tree = *pTree;
7957 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7959 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7961 pData->bValidIndex = false;
7965 if (tree->gtOper == GT_LCL_VAR)
7967 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7969 pData->bValidIndex = false;
7974 return WALK_CONTINUE;
7978 returns true if a range check can legally be removed (for the moment it checks
7979 that the array is a local array (non subject to racing conditions) and that the
7980 index is either a constant or a local
7982 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7984 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7985 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7986 GenTreePtr pArray = bndsChk->GetArray();
7987 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
7991 GenTreePtr pIndex = bndsChk->gtIndex;
7993 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
7994 // Otherwise we can be targeted by malicious race-conditions.
7995 if (pArray != nullptr)
7997 if (pArray->gtOper != GT_LCL_VAR)
8003 printf("Can't remove range check if the array isn't referenced with a local\n");
8011 noway_assert(pArray->gtType == TYP_REF);
8012 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8014 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8016 // If the array address has been taken, don't do the optimization
8017 // (this restriction can be lowered a bit, but i don't think it's worth it)
8018 CLANG_FORMAT_COMMENT_ANCHOR;
8022 printf("Can't remove range check if the array has its address taken\n");
8031 optRangeCheckDsc Data;
8032 Data.pCompiler = this;
8033 Data.bValidIndex = true;
8035 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8037 if (!Data.bValidIndex)
8042 printf("Can't remove range check with this index");
8053 /******************************************************************************
8055 * Replace x==null with (x|x)==0 if x is a GC-type.
8056 * This will stress code-gen and the emitter to make sure they support such trees.
8061 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8063 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8068 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8069 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8071 noway_assert(condStmt->gtOper == GT_JTRUE);
8076 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8078 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8083 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8088 GenTreePtr comparandClone = gtCloneExpr(comparand);
8090 // Bump up the ref-counts of any variables in 'comparandClone'
8091 compCurBB = condBlock;
8092 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8094 noway_assert(relop->gtOp.gtOp1 == comparand);
8095 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8096 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8098 // Comparand type is already checked, and we have const int, there is no harm
8099 // morphing it into a TYP_I_IMPL.
8100 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8101 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8106 /******************************************************************************
8107 * Function used by folding of boolean conditionals
8108 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8109 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8110 * being a boolean lclVar and "op2" the const 0/1.
8111 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8112 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8113 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8114 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8115 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8118 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8120 bool isBool = false;
8122 noway_assert(condBranch->gtOper == GT_JTRUE);
8123 GenTree* cond = condBranch->gtOp.gtOp1;
8125 /* The condition must be "!= 0" or "== 0" */
8127 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8132 /* Return the compare node to the caller */
8136 /* Get hold of the comparands */
8138 GenTree* opr1 = cond->gtOp.gtOp1;
8139 GenTree* opr2 = cond->gtOp.gtOp2;
8141 if (opr2->gtOper != GT_CNS_INT)
8146 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8151 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8153 /* Is the value a boolean?
8154 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8155 * a local variable that is marked as being boolean (lvIsBoolean) */
8157 if (opr1->gtFlags & GTF_BOOLEAN)
8161 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8165 else if (opr1->gtOper == GT_LCL_VAR)
8167 /* is it a boolean local variable */
8169 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8170 noway_assert(lclNum < lvaCount);
8172 if (lvaTable[lclNum].lvIsBoolean)
8178 /* Was our comparison against the constant 1 (i.e. true) */
8181 // If this is a boolean expression tree we can reverse the relop
8182 // and change the true to false.
8185 gtReverseCond(cond);
8186 opr2->gtIntCon.gtIconVal = 0;
8198 void Compiler::optOptimizeBools()
8203 printf("*************** In optOptimizeBools()\n");
8206 printf("Blocks/Trees before phase\n");
8207 fgDispBasicBlocks(true);
8217 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8219 /* We're only interested in conditional jumps here */
8221 if (b1->bbJumpKind != BBJ_COND)
8226 /* If there is no next block, we're done */
8228 BasicBlock* b2 = b1->bbNext;
8234 /* The next block must not be marked as BBF_DONT_REMOVE */
8235 if (b2->bbFlags & BBF_DONT_REMOVE)
8240 /* The next block also needs to be a condition */
8242 if (b2->bbJumpKind != BBJ_COND)
8245 optOptimizeBoolsGcStress(b1);
8250 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8252 if (b1->bbJumpDest == b2->bbJumpDest)
8254 /* Given the following sequence of blocks :
8258 we wil try to fold it to :
8259 B1: brtrue(t1|t2, BX)
8265 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8267 /* Given the following sequence of blocks :
8271 we will try to fold it to :
8272 B1: brtrue((!t1)&&t2, B3)
8283 /* The second block must contain a single statement */
8285 GenTreePtr s2 = b2->bbTreeList;
8286 if (s2->gtPrev != s2)
8291 noway_assert(s2->gtOper == GT_STMT);
8292 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8293 noway_assert(t2->gtOper == GT_JTRUE);
8295 /* Find the condition for the first block */
8297 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8299 noway_assert(s1->gtOper == GT_STMT);
8300 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8301 noway_assert(t1->gtOper == GT_JTRUE);
8303 if (b2->countOfInEdges() > 1)
8308 /* Find the branch conditions of b1 and b2 */
8312 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8318 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8324 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8325 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8327 // Leave out floats where the bit-representation is more complicated
8328 // - there are two representations for 0.
8330 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8335 // Make sure the types involved are of the same sizes
8336 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8340 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8344 #ifdef _TARGET_ARMARCH_
8345 // Skip the small operand which we cannot encode.
8346 if (varTypeIsSmall(c1->TypeGet()))
8349 /* The second condition must not contain side effects */
8351 if (c2->gtFlags & GTF_GLOB_EFFECT)
8356 /* The second condition must not be too expensive */
8360 if (c2->gtCostEx > 12)
8367 var_types foldType = c1->TypeGet();
8368 if (varTypeIsGC(foldType))
8370 foldType = TYP_I_IMPL;
8375 /* Both conditions must be the same */
8377 if (t1->gtOper != t2->gtOper)
8382 if (t1->gtOper == GT_EQ)
8384 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8385 So we will branch to BX if (c1&c2)==0 */
8392 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8393 So we will branch to BX if (c1|c2)!=0 */
8401 /* The b1 condition must be the reverse of the b2 condition */
8403 if (t1->gtOper == t2->gtOper)
8408 if (t1->gtOper == GT_EQ)
8410 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8411 So we will branch to BX if (c1&c2)!=0 */
8418 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8419 So we will branch to BX if (c1|c2)==0 */
8426 // Anding requires both values to be 0 or 1
8428 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8434 // Now update the trees
8436 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8439 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8440 cmpOp1->gtFlags |= GTF_BOOLEAN;
8444 t1->gtOp.gtOp1 = cmpOp1;
8445 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8447 #if FEATURE_SET_FLAGS
8448 // For comparisons against zero we will have the GTF_SET_FLAGS set
8449 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8450 // during the CSE phase.
8452 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8453 // as they are no longer feeding directly into a comparisons against zero
8455 // Make sure that the GTF_SET_FLAGS bit is cleared.
8456 // Fix 388436 ARM JitStress WP7
8457 c1->gtFlags &= ~GTF_SET_FLAGS;
8458 c2->gtFlags &= ~GTF_SET_FLAGS;
8460 // The new top level node that we just created does feed directly into
8461 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8462 // we generate an instuction that sets the flags, which allows us
8463 // to omit the cmp with zero instruction.
8465 // Request that the codegen for cmpOp1 sets the condition flags
8466 // when it generates the code for cmpOp1.
8468 cmpOp1->gtRequestSetFlags();
8471 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8474 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8478 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8482 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8484 fgRemoveRefPred(b1->bbJumpDest, b1);
8486 b1->bbJumpDest = b2->bbJumpDest;
8488 fgAddRefPred(b2->bbJumpDest, b1);
8491 noway_assert(edge1 != nullptr);
8492 noway_assert(edge2 != nullptr);
8494 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8495 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8496 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8498 edge1->flEdgeWeightMin = edgeSumMin;
8499 edge1->flEdgeWeightMax = edgeSumMax;
8503 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8504 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8507 /* Get rid of the second block (which is a BBJ_COND) */
8509 noway_assert(b1->bbJumpKind == BBJ_COND);
8510 noway_assert(b2->bbJumpKind == BBJ_COND);
8511 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8512 noway_assert(b1->bbNext == b2);
8513 noway_assert(b2->bbNext);
8516 b2->bbFlags |= BBF_REMOVED;
8518 // If b2 was the last block of a try or handler, update the EH table.
8520 ehUpdateForDeletedBlock(b2);
8522 /* Update bbRefs and bbPreds */
8524 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8525 * Remove pred 'b2' for 'b2->bbJumpDest' */
8527 fgReplacePred(b2->bbNext, b2, b1);
8529 fgRemoveRefPred(b2->bbJumpDest, b2);
8531 /* Update the block numbers and try again */
8543 // Update loop table
8544 fgUpdateLoopsAfterCompacting(b1, b2);
8549 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8550 b1->bbNum, b2->bbNum);
8559 fgDebugCheckBBlist();