1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning ( disable : 4701 )
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = NULL;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
52 DataFlow::DataFlow(Compiler* pCompiler)
53 : m_pCompiler(pCompiler)
57 /*****************************************************************************
61 void Compiler::optSetBlockWeights()
63 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
64 assert(fgDomsComputed);
70 bool firstBBdomsRets = true;
74 for (block = fgFirstBB; (block != NULL); block = block->bbNext)
76 /* Blocks that can't be reached via the first block are rarely executed */
77 if (!fgReachable(fgFirstBB, block))
78 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock *begBlk,
139 /* Calculate the 'loopWeight',
140 this is the amount to increase each block in the loop
141 Our heuristic is that loops are weighted eight times more
142 than straight line code.
143 Thus we increase each block by 7 times the weight of
144 the loop header block,
145 if the loops are all properly formed gives us:
146 (assuming that BB_LOOP_WEIGHT is 8)
148 1 -- non loop basic block
149 8 -- single loop nesting
150 64 -- double loop nesting
151 512 -- triple loop nesting
155 noway_assert(begBlk->bbNum <= endBlk->bbNum);
156 noway_assert(begBlk->isLoopHead());
157 noway_assert(fgReachable(begBlk, endBlk));
161 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
165 noway_assert(!opts.MinOpts());
167 /* Build list of backedges for block begBlk */
168 flowList * backedgeList = NULL;
170 for (flowList* pred = begBlk->bbPreds;
174 /* Is this a backedge? */
175 if (pred->flBlock->bbNum >= begBlk->bbNum)
177 flowList* flow = new (this, CMK_FlowList) flowList();
179 #if MEASURE_BLOCK_SIZE
181 genFlowNodeSize += sizeof(flowList);
182 #endif // MEASURE_BLOCK_SIZE
184 flow->flNext = backedgeList;
185 flow->flBlock = pred->flBlock;
190 /* At least one backedge must have been found (the one from endBlk) */
191 noway_assert(backedgeList);
193 BasicBlock * curBlk = begBlk;
197 noway_assert(curBlk);
199 // For curBlk to be part of a loop that starts at begBlk
200 // curBlk must be reachable from begBlk and (since this is a loop)
201 // likewise begBlk must be reachable from curBlk.
204 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
206 /* If this block reaches any of the backedge blocks we set reachable */
207 /* If this block dominates any of the backedge blocks we set dominates */
208 bool reachable = false;
209 bool dominates = false;
211 for (flowList* tmp = backedgeList;
215 BasicBlock * backedge = tmp->flBlock;
217 if (!curBlk->isRunRarely())
219 reachable |= fgReachable(curBlk, backedge);
220 dominates |= fgDominate (curBlk, backedge);
222 if (dominates && reachable)
229 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
233 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
235 // We have real profile weights, so we aren't going to change this blocks weight
236 weight = curBlk->bbWeight;
241 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
244 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT/2);
248 // The multiplication may have caused us to overflow
250 if (weight < curBlk->bbWeight)
252 // The multiplication caused us to overflow
253 weight = BB_MAX_WEIGHT;
256 // Set the new weight
258 curBlk->modifyBBWeight(weight);
263 printf("\n BB%02u(wt=%s)",
265 refCntWtd2str(curBlk->getBBWeight(this)));
271 /* Stop if we've reached the last block in the loop */
273 if (curBlk == endBlk)
276 curBlk = curBlk->bbNext;
278 /* If we are excluding the endBlk then stop if we've reached endBlk */
280 if (excludeEndBlk && (curBlk == endBlk))
285 /*****************************************************************************
287 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
290 void Compiler::optUnmarkLoopBlocks(BasicBlock *begBlk,
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList * pred = begBlk->bbPreds;
312 curBlk = pred->flBlock;
314 /* is this a backward edge? (from curBlk to begBlk) */
316 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) &&
322 (curBlk->bbJumpKind != BBJ_ALWAYS) )
328 /* Only unmark the loop blocks if we have exactly one loop back edge */
329 if (backEdgeCount != 1)
334 if (backEdgeCount > 0)
336 printf("\nNot removing loop L%02u, due to an additional back edge",
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge",
348 noway_assert(backEdgeCount == 1);
349 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
359 noway_assert(curBlk);
361 // For curBlk to be part of a loop that starts at begBlk
362 // curBlk must be reachable from begBlk and (since this is a loop)
363 // likewise begBlk must be reachable from curBlk.
365 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
367 unsigned weight = curBlk->bbWeight;
369 // Don't unmark blocks that are set to BB_MAX_WEIGHT
370 // Don't unmark blocks when we are using profile weights
372 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
374 if (!fgDominate(curBlk, endBlk))
380 /* Merging of blocks can disturb the Dominates
381 information (see RAID #46649) */
382 if (weight < BB_LOOP_WEIGHT)
386 // We can overflow here so check for it
387 if (weight < curBlk->bbWeight)
388 weight = BB_MAX_WEIGHT;
390 assert (weight >= BB_LOOP_WEIGHT);
392 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
398 printf("\n BB%02u(wt=%s)",
400 refCntWtd2str(curBlk->getBBWeight(this)));
404 /* Stop if we've reached the last block in the loop */
406 if (curBlk == endBlk)
409 curBlk = curBlk->bbNext;
411 /* Stop if we go past the last block in the loop, as it may have been deleted */
412 if (curBlk->bbNum > endBlk->bbNum)
417 /*****************************************************************************************************
419 * Function called to update the loop table and bbWeight before removing a block
422 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock * block,
428 noway_assert(!opts.MinOpts());
430 bool removeLoop = false;
432 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
433 /* Special case: the block was the head of a loop - or pointing to a loop entry */
435 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
437 /* Some loops may have been already removed by
438 * loop unrolling or conditional folding */
440 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
443 if (block == optLoopTable[loopNum].lpEntry ||
444 block == optLoopTable[loopNum].lpBottom )
446 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
453 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
454 optPrintLoopInfo(loopNum);
458 /* If the loop is still in the table
459 * any block in the loop must be reachable !!! */
461 noway_assert(optLoopTable[loopNum].lpEntry != block);
462 noway_assert(optLoopTable[loopNum].lpBottom != block);
464 if (optLoopTable[loopNum].lpExit == block)
466 optLoopTable[loopNum].lpExit = NULL;
467 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;;
471 /* If this points to the actual entry in the loop
472 * then the whole loop may become unreachable */
474 switch (block->bbJumpKind)
477 BasicBlock * * jumpTab;
481 if (block->bbNext == optLoopTable[loopNum].lpEntry)
486 if (block->bbJumpKind == BBJ_NONE)
492 noway_assert(block->bbJumpDest);
493 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
500 jumpCnt = block->bbJumpSwt->bbsCount;
501 jumpTab = block->bbJumpSwt->bbsDstTab;
504 noway_assert(*jumpTab);
505 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
509 } while (++jumpTab, --jumpCnt);
518 /* Check if the entry has other predecessors outside the loop
519 * TODO: Replace this when predecessors are available */
521 BasicBlock * auxBlock;
522 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
524 /* Ignore blocks in the loop */
526 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
527 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
530 switch (auxBlock->bbJumpKind)
533 BasicBlock * * jumpTab;
537 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
542 if (auxBlock->bbJumpKind == BBJ_NONE)
548 noway_assert(auxBlock->bbJumpDest);
549 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
556 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
557 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
561 noway_assert(*jumpTab);
562 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
566 } while (++jumpTab, --jumpCnt);
576 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
579 else if (optLoopTable[loopNum].lpHead == block)
581 /* The loop has a new head - Just update the loop table */
582 optLoopTable[loopNum].lpHead = block->bbPrev;
587 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
588 optPrintLoopInfo(loopNum);
593 if ((skipUnmarkLoop == false) &&
594 ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
595 (block->bbJumpDest->isLoopHead()) &&
596 (block->bbJumpDest->bbNum <= block->bbNum) &&
598 (fgCurBBEpochSize == fgDomBBcount + 1) &&
599 fgReachable(block->bbJumpDest, block))
601 optUnmarkLoopBlocks(block->bbJumpDest, block);
607 /*****************************************************************************
609 * Given the beginBlock of the loop, return the index of this loop
613 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
617 for (lnum = 0; lnum < optLoopCount; lnum++)
619 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
626 noway_assert(!"Loop number not found.");
631 /*****************************************************************************
633 * Print loop info in an uniform way.
636 void Compiler::optPrintLoopInfo(unsigned loopInd,
638 BasicBlock * lpFirst,
640 BasicBlock * lpEntry,
641 BasicBlock * lpBottom,
642 unsigned char lpExitCnt,
647 noway_assert(lpHead);
650 // NOTE: we take "loopInd" as an argument instead of using the one
651 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
652 // has not be set correctly. For example, in optRecordLoop().
653 // However, in most of the cases, loops should have been recorded.
654 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
655 // version of this method.
657 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
658 if (lpTop != lpFirst)
660 printf(" (loop top is BB%02u)", lpTop->bbNum);
663 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d",
670 if (lpExitCnt == 1) {
671 printf(" at BB%02u", lpExit->bbNum);
674 if (parentLoop != BasicBlock::NOT_IN_LOOP)
676 printf(", parent loop = L%02u", parentLoop);
681 /*****************************************************************************
683 * Print loop information given the index of the loop in the loop table.
686 void Compiler::optPrintLoopInfo(unsigned lnum)
688 noway_assert(lnum < optLoopCount);
690 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
692 optPrintLoopInfo(lnum,
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
776 // Obtain the relop from the "test" tree.
778 if (test->gtOper == GT_JTRUE)
780 relop = test->gtGetOp1();
784 assert(test->gtOper == GT_ASG);
785 relop = test->gtGetOp2();
788 noway_assert(relop->OperKind() & GTK_RELOP);
790 GenTreePtr opr1 = relop->gtOp.gtOp1;
791 GenTreePtr opr2 = relop->gtOp.gtOp2;
796 // Make sure op1 or op2 is the iterVar.
797 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
802 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
812 if (iterOp->gtType != TYP_INT)
817 // Mark the iterator node.
818 iterOp->gtFlags |= GTF_VAR_ITERATOR;
820 // Check what type of limit we have - constant, variable or arr-len.
821 if (limitOp->gtOper == GT_CNS_INT)
823 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
827 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
829 else if (limitOp->gtOper == GT_ARR_LENGTH)
831 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
837 // Save the type of the comparison between the iterator and the limit.
838 optLoopTable[loopInd].lpTestTree = relop;
842 //----------------------------------------------------------------------------------
843 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
846 // incr The incr tree to be checked. Whether incr tree is
847 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
850 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
851 // and the rhs limit is extracted from the "test" tree. The limit information is
852 // added to the loop table.
855 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
857 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
860 genTreeOps updateOper;
861 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
862 if (iterVar != BAD_VAR_NUM)
864 // We have v = v op y type asg node.
877 // Increment should be by a const int.
878 // TODO-CQ: CLONE: allow variable increments.
879 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
888 //----------------------------------------------------------------------------------
889 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
892 // from, to - are blocks (beg, end) which are part of the loop.
893 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
894 // pIterVar - see return value.
897 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
901 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
902 // assigned in the loop.
904 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
907 unsigned iterVar = optIsLoopIncrTree(incr);
908 if (iterVar == BAD_VAR_NUM)
912 if (optIsVarAssigned(from, to, incr, iterVar))
914 JITDUMP("iterVar is assigned in loop\n");
922 //----------------------------------------------------------------------------------
923 // optIsLoopTestEvalIntoTemp:
924 // Pattern match if the test tree is computed into a tmp
925 // and the "tmp" is used as jump condition for loop termination.
928 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
929 // where Vtmp contains the actual loop test result.
930 // newStmt - contains the statement that is the actual test stmt involving
931 // the loop iterator.
934 // Returns true if a new test tree can be obtained.
937 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
938 // Then returns the rhs for def of Vtmp as the "test" node.
941 // This method just retrieves what it thinks is the "test" node,
942 // the callers are expected to verify that "iterVar" is used in the test.
944 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
946 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
948 if (test->gtOper != GT_JTRUE)
953 GenTreePtr relop = test->gtGetOp1();
954 noway_assert(relop->OperIsCompare());
956 GenTreePtr opr1 = relop->gtOp.gtOp1;
957 GenTreePtr opr2 = relop->gtOp.gtOp2;
959 // Make sure we have jtrue (vtmp != 0)
960 if ((relop->OperGet() == GT_NE) &&
961 (opr1->OperGet() == GT_LCL_VAR) &&
962 (opr2->OperGet() == GT_CNS_INT) &&
963 opr2->IsIntegralConst(0))
965 // Get the previous statement to get the def (rhs) of Vtmp to see
966 // if the "test" is evaluated into Vtmp.
967 GenTreePtr prevStmt = testStmt->gtPrev;
968 if (prevStmt == nullptr)
973 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
974 if (tree->OperGet() == GT_ASG)
976 GenTreePtr lhs = tree->gtOp.gtOp1;
977 GenTreePtr rhs = tree->gtOp.gtOp2;
979 // Return as the new test node.
980 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
982 if (rhs->OperIsCompare())
993 //----------------------------------------------------------------------------------
994 // optExtractInitTestIncr:
995 // Extract the "init", "test" and "incr" nodes of the loop.
998 // head - Loop head block
999 // bottom - Loop bottom block
1000 // top - Loop top block
1001 // ppInit - The init stmt of the loop if found.
1002 // ppTest - The test stmt of the loop if found.
1003 // ppIncr - The incr stmt of the loop if found.
1006 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1007 // returns true. Returns false if the information can't be extracted.
1010 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1011 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1012 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1015 // +-------<-----------------<-----------+
1018 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1020 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1021 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1022 // in the jtrue condition.
1025 // This method just retrieves what it thinks is the "test" node,
1026 // the callers are expected to verify that "iterVar" is used in the test.
1028 bool Compiler::optExtractInitTestIncr(BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1030 assert(ppInit != nullptr);
1031 assert(ppTest != nullptr);
1032 assert(ppIncr != nullptr);
1034 // Check if last two statements in the loop body are the increment of the iterator
1035 // and the loop termination test.
1036 noway_assert(bottom->bbTreeList != nullptr);
1037 GenTreePtr test = bottom->bbTreeList->gtPrev;
1038 noway_assert(test != nullptr && test->gtNext == nullptr);
1041 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1046 // Check if we have the incr tree before the test tree, if we don't,
1047 // check if incr is part of the loop "top".
1048 GenTreePtr incr = test->gtPrev;
1049 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1051 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1056 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1057 GenTreePtr topLast = top->bbTreeList->gtPrev;
1058 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1068 assert(test != incr);
1070 // Find the last statement in the loop pre-header which we expect to be the initialization of
1071 // the loop iterator.
1072 GenTreePtr phdr = head->bbTreeList;
1073 if (phdr == nullptr)
1078 GenTreePtr init = phdr->gtPrev;
1079 noway_assert(init != nullptr && (init->gtNext == 0));
1081 // If it is a duplicated loop condition, skip it.
1082 if (init->gtFlags & GTF_STMT_CMPADD)
1084 // Must be a duplicated loop condition.
1085 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1086 init = init->gtPrev;
1087 noway_assert(init != nullptr);
1090 noway_assert(init->gtOper == GT_STMT);
1091 noway_assert(test->gtOper == GT_STMT);
1092 noway_assert(incr->gtOper == GT_STMT);
1094 *ppInit = init->gtStmt.gtStmtExpr;
1095 *ppTest = test->gtStmt.gtStmtExpr;
1096 *ppIncr = incr->gtStmt.gtStmtExpr;
1101 /*****************************************************************************
1103 * Record the loop in the loop table.
1106 void Compiler::optRecordLoop(BasicBlock * head,
1110 BasicBlock * bottom,
1112 unsigned char exitCnt)
1114 // Record this loop in the table, if there's room.
1116 assert(optLoopCount <= MAX_LOOP_NUM);
1117 if (optLoopCount == MAX_LOOP_NUM)
1120 loopOverflowThisMethod = true;
1125 // Assumed preconditions on the loop we're adding.
1126 assert(first->bbNum <= top->bbNum);
1127 assert(top->bbNum <= entry->bbNum);
1128 assert(entry->bbNum <= bottom->bbNum);
1129 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1131 // If the new loop contains any existing ones, add it in the right place.
1132 unsigned char loopInd = optLoopCount;
1133 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1135 unsigned char prev = prevPlus1 - 1;
1136 if (optLoopTable[prev].lpContainedBy(first, bottom))
1139 // Move up any loops if necessary.
1140 for (unsigned j = optLoopCount; j > loopInd; j--)
1142 optLoopTable[j] = optLoopTable[j-1];
1146 for (unsigned i = loopInd+1; i < optLoopCount; i++)
1148 // The loop is well-formed.
1149 assert(optLoopTable[i].lpWellFormed());
1150 // Check for disjoint.
1151 if (optLoopTable[i].lpDisjoint(first, bottom)) continue;
1152 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1153 assert(optLoopTable[i].lpContainedBy(first, bottom));
1157 optLoopTable[loopInd].lpHead = head;
1158 optLoopTable[loopInd].lpFirst = first;
1159 optLoopTable[loopInd].lpTop = top;
1160 optLoopTable[loopInd].lpBottom = bottom;
1161 optLoopTable[loopInd].lpEntry = entry;
1162 optLoopTable[loopInd].lpExit = exit;
1163 optLoopTable[loopInd].lpExitCnt = exitCnt;
1165 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1166 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1167 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1169 optLoopTable[loopInd].lpFlags = 0;
1171 // We haven't yet recorded any side effects.
1172 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1173 optLoopTable[loopInd].lpFieldsModified = nullptr;
1174 optLoopTable[loopInd].lpArrayElemTypesModified = 0;
1176 // If DO-WHILE loop mark it as such.
1177 if (head->bbNext == entry)
1179 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1182 // If single exit loop mark it as such.
1186 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1190 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1191 // We have the following restrictions:
1192 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1193 // 2. There must be a loop iterator (a local var) that is
1194 // incremented (decremented or lsh, rsh, mul) with a constant value
1195 // 3. The iterator is incremented exactly once
1196 // 4. The loop condition must use the iterator.
1198 if (bottom->bbJumpKind == BBJ_COND)
1203 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1208 unsigned iterVar = BAD_VAR_NUM;
1209 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1214 // Make sure the "iterVar" initialization is never skipped,
1215 // i.e. HEAD dominates the ENTRY.
1216 if (!fgDominate(head, entry))
1221 if (!optPopulateInitInfo(loopInd, init, iterVar))
1226 // Check that the iterator is used in the loop condition.
1227 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1232 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1233 // Record the iterator, the pointer to the test node
1234 // and the initial value of the iterator (constant or local var)
1235 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1238 optLoopTable[loopInd].lpIterTree = incr;
1240 // Save the initial value of the iterator - can be lclVar or constant
1241 // Flag the loop accordingly.
1248 simpleTestLoopCount++;
1251 // Check if a constant iteration loop.
1252 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) &&
1253 (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1255 // This is a constant loop.
1256 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1258 constIterLoopCount++;
1265 printf("\nConstant loop initializer:\n");
1268 printf("\nConstant loop body:\n");
1270 BasicBlock* block = head;
1273 block = block->bbNext;
1274 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1276 if (stmt->gtStmt.gtStmtExpr == incr)
1279 gtDispTree(stmt->gtStmt.gtStmtExpr);
1282 while (block != bottom);
1289 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1294 //------------------------------------------------------------------------
1295 // optPrintLoopRecording: Print a recording of the loop.
1298 // loopInd - loop index.
1300 void Compiler::optPrintLoopRecording(unsigned loopInd)
1302 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1303 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1304 optLoopTable[loopInd].lpHead,
1305 optLoopTable[loopInd].lpFirst,
1306 optLoopTable[loopInd].lpTop,
1307 optLoopTable[loopInd].lpEntry,
1308 optLoopTable[loopInd].lpBottom,
1309 optLoopTable[loopInd].lpExitCnt,
1310 optLoopTable[loopInd].lpExit
1313 // If an iterator loop print the iterator and the initialization.
1314 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1316 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1318 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1320 printf("%d )", optLoopTable[loopInd].lpIterConst());
1322 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1323 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1324 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1325 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1327 // If a simple test condition print operator and the limits */
1328 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1330 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1331 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1333 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1334 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1342 void Compiler::optCheckPreds()
1345 BasicBlock * blockPred;
1348 for (block = fgFirstBB; block; block = block->bbNext)
1350 for (pred = block->bbPreds; pred; pred = pred->flNext)
1352 // make sure this pred is part of the BB list
1353 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1355 if (blockPred == pred->flBlock)
1358 noway_assert(blockPred);
1359 switch (blockPred->bbJumpKind)
1362 if (blockPred->bbJumpDest == block)
1366 noway_assert(blockPred->bbNext == block);
1368 case BBJ_EHFILTERRET:
1370 case BBJ_EHCATCHRET:
1371 noway_assert(blockPred->bbJumpDest == block);
1382 /*****************************************************************************
1383 * Find the natural loops, using dominators. Note that the test for
1384 * a loop is slightly different from the standard one, because we have
1385 * not done a depth first reordering of the basic blocks.
1388 void Compiler::optFindNaturalLoops()
1392 printf("*************** In optFindNaturalLoops()\n");
1397 flowList * predEntry;
1399 noway_assert(fgDomsComputed);
1403 hasMethodLoops = false;
1404 loopsThisMethod = 0;
1405 loopOverflowThisMethod = false;
1408 /* We will use the following terminology:
1409 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1410 Not part of the looping of the loop.
1411 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop, but not the outer loop. ???)
1412 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1413 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1414 * EXIT - the loop exit or the block right after the bottom
1415 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1417 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1448 BasicBlock * bottom;
1451 unsigned char exitCount;
1454 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1460 // Blocks that are rarely run have a zero bbWeight and should
1461 // never be optimized here
1463 if (top->bbWeight == BB_ZERO_WEIGHT)
1466 for (pred = top->bbPreds; pred; pred = pred->flNext)
1468 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1469 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1470 * as the definition says, but merely an indication that we have a loop there).
1471 * Thus, we have to be very careful and after entry discovery check that it is indeed
1472 * the only place we enter the loop (especially for non-reducible flow graphs).
1475 bottom = pred->flBlock;
1478 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1480 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) ||
1481 (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1482 (bottom->bbJumpKind == BBJ_EHCATCHRET) ||
1483 (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1484 (bottom->bbJumpKind == BBJ_SWITCH) )
1486 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1487 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1491 BasicBlock * loopBlock;
1493 /* The presence of a "back edge" is an indication that a loop might be present here
1496 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1497 * node in the loop to any other node in the loop (wholly within the loop)
1498 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1499 * in the loop from outside the loop, and that is through the ENTRY
1502 /* Let's find the loop ENTRY */
1504 if (head->bbJumpKind == BBJ_ALWAYS)
1506 if (head->bbJumpDest->bbNum <= bottom->bbNum &&
1507 head->bbJumpDest->bbNum >= top->bbNum )
1509 /* OK - we enter somewhere within the loop */
1510 entry = head->bbJumpDest;
1512 /* some useful asserts
1513 * Cannot enter at the top - should have being caught by redundant jumps */
1515 assert ((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1519 /* special case - don't consider now */
1520 //assert (!"Loop entered in weird way!");
1524 // Can we fall through into the loop?
1525 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1527 /* The ENTRY is at the TOP (a do-while loop) */
1532 goto NO_LOOP; // head does not flow into the loop bail for now
1535 // Now we find the "first" block -- the earliest block reachable within the loop.
1536 // This is usually the same as "top", but can differ in rare cases where "top" is
1537 // the entry block of a nested loop, and that nested loop branches backwards to a
1538 // a block before "top". We find this by searching for such backwards branches
1539 // in the loop known so far.
1540 BasicBlock* first = top;
1541 BasicBlock* newFirst;
1542 bool blocksToSearch = true;
1543 BasicBlock* validatedAfter = bottom->bbNext;
1544 while (blocksToSearch)
1546 blocksToSearch = false;
1548 blocksToSearch = false;
1549 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1551 unsigned nSucc = loopBlock->NumSucc();
1552 for (unsigned j = 0; j < nSucc; j++)
1554 BasicBlock* succ = loopBlock->GetSucc(j);
1555 if ( (newFirst == nullptr && succ->bbNum < first->bbNum)
1556 || (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1562 if (newFirst != nullptr)
1564 validatedAfter = first;
1566 blocksToSearch = true;
1570 // Is "head" still before "first"? If not, we don't have a valid loop...
1571 if (head->bbNum >= first->bbNum)
1573 JITDUMP("Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1574 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1578 /* Make sure ENTRY dominates all blocks in the loop
1579 * This is necessary to ensure condition 2. above
1580 * At the same time check if the loop has a single exit
1581 * point - those loops are easier to optimize */
1583 for (loopBlock = top;
1584 loopBlock != bottom->bbNext;
1585 loopBlock = loopBlock->bbNext)
1587 if (!fgDominate(entry, loopBlock))
1592 if (loopBlock == bottom)
1594 if (bottom->bbJumpKind != BBJ_ALWAYS)
1596 /* there is an exit at the bottom */
1598 noway_assert(bottom->bbJumpDest == top);
1605 BasicBlock * exitPoint;
1607 switch (loopBlock->bbJumpKind)
1610 case BBJ_CALLFINALLY:
1612 case BBJ_EHCATCHRET:
1613 assert (loopBlock->bbJumpDest);
1614 exitPoint = loopBlock->bbJumpDest;
1616 if (exitPoint->bbNum < top->bbNum ||
1617 exitPoint->bbNum > bottom->bbNum )
1619 /* exit from a block other than BOTTOM */
1628 case BBJ_EHFINALLYRET:
1629 case BBJ_EHFILTERRET:
1630 /* The "try" associated with this "finally" must be in the
1631 * same loop, so the finally block will return control inside the loop */
1636 /* those are exits from the loop */
1643 unsigned jumpCnt; jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1644 BasicBlock * * jumpTab; jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1648 noway_assert(*jumpTab);
1649 exitPoint = *jumpTab;
1651 if (exitPoint->bbNum < top->bbNum ||
1652 exitPoint->bbNum > bottom->bbNum )
1658 while (++jumpTab, --jumpCnt);
1662 noway_assert(!"Unexpected bbJumpKind");
1667 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1668 * This is to ensure condition 1. above which prevents marking fake loops
1670 * Below is an example:
1678 * The example above is not a loop since we bail after the first iteration
1680 * The condition we have to check for is
1681 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is reachable,
1682 * it can only be reached through ENTRY, therefore we have a way back to ENTRY
1684 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1685 * loop bottom then we cannot iterate
1687 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1688 * part of the loop nodes (as per definition they are loop exits executed only once),
1689 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1691 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1693 switch (loopBlock->bbJumpKind)
1698 if (fgDominate(loopBlock, bottom))
1705 bool canIterateLoop = false;
1707 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1709 if (predEntry->flBlock->bbNum >= top->bbNum &&
1710 predEntry->flBlock->bbNum <= bottom->bbNum )
1712 canIterateLoop = true;
1715 else if (predEntry->flBlock != head)
1717 // The entry block has multiple predecessors outside the loop; the 'head'
1718 // block isn't the only one. We only support a single 'head', so bail.
1723 if (!canIterateLoop)
1726 /* Double check - make sure that all loop blocks except ENTRY
1727 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1728 * us from considering non-loops due to incorrectly assuming that we had a back edge
1731 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1734 for (loopBlock = top;
1735 loopBlock != bottom->bbNext;
1736 loopBlock = loopBlock->bbNext)
1738 if (loopBlock == entry)
1741 for (predTop = loopBlock->bbPreds;
1743 predTop = predTop->flNext)
1745 if (predTop->flBlock->bbNum < top->bbNum ||
1746 predTop->flBlock->bbNum > bottom->bbNum )
1748 //noway_assert(!"Found loop with multiple entries");
1754 // Disqualify loops where the first block of the loop is less nested in EH than
1755 // the bottom block. That is, we don't want to handle loops where the back edge
1756 // goes from within an EH region to a first block that is outside that same EH
1757 // region. Note that we *do* handle loops where the first block is the *first*
1758 // block of a more nested EH region (since it is legal to branch to the first
1759 // block of an immediately more nested EH region). So, for example, disqualify
1766 // BB10 BBJ_COND => BB02
1770 // Here, BB10 is more nested than BB02.
1772 if (bottom->hasTryIndex() &&
1773 !bbInTryRegions(bottom->getTryIndex(), first))
1775 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting loop.\n",
1776 first->bbNum, bottom->bbNum);
1780 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1781 // Disqualify loops where the first block of the loop is a finally target.
1782 // The main problem is when multiple loops share a 'first' block that is a finally
1783 // target and we canonicalize the loops by adding a new loop head. In that case, we
1784 // need to update the blocks so the finally target bit is moved to the newly created
1785 // block, and removed from the old 'first' block. This is 'hard', so at this point
1786 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1787 // long-term), it's easier to disallow the loop than to update the flow graph to
1788 // support this case.
1790 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1792 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n",
1796 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1798 /* At this point we have a loop - record it in the loop table
1799 * If we found only one exit, record it in the table too
1800 * (otherwise an exit = 0 in the loop table means multiple exits) */
1807 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1810 if (!hasMethodLoops)
1812 /* mark the method as containing natural loops */
1814 hasMethodLoops = true;
1817 /* increment total number of loops found */
1821 /* keep track of the number of exits */
1822 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1823 #endif // COUNT_LOOPS
1826 /* current predecessor not good for a loop - continue with another one, if any */
1832 loopCountTable.record(loopsThisMethod);
1833 if (maxLoopsPerMethod < loopsThisMethod)
1835 maxLoopsPerMethod = loopsThisMethod;
1837 if (loopOverflowThisMethod)
1839 totalLoopOverflows++;
1841 #endif // COUNT_LOOPS
1843 // Now the loop indices are stable. We can figure out parent/child relationships
1844 // (using table indices to name loops), and label blocks.
1845 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1847 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1850 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1852 optLoopTable[loopInd].lpParent = possibleParent;
1853 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1854 optLoopTable[possibleParent].lpChild = loopInd;
1860 // Now label the blocks with the innermost loop to which they belong. Since parents
1861 // precede children in the table, doing the labeling for each loop in order will achieve
1862 // this -- the innermost loop labeling will be done last.
1863 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1865 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1866 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1867 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1869 blk->bbNatLoopNum = loopInd;
1870 if (blk == bottom) break;
1871 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1875 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1876 // one, if necessary, for loops containing others that share a "top."
1878 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1880 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1881 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1885 if (optCanonicalizeLoopNest(loopInd))
1892 fgUpdateChangedFlowGraph();
1896 if (verbose && optLoopCount > 0)
1898 printf("\nFinal natural loop table:\n");
1899 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1901 optPrintLoopInfo(loopInd);
1908 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1910 BasicBlock* newJumpDest = nullptr;
1911 switch (blk->bbJumpKind)
1916 case BBJ_EHFILTERRET:
1917 case BBJ_EHFINALLYRET:
1918 case BBJ_EHCATCHRET:
1919 // These have no jump destination to update.
1924 case BBJ_CALLFINALLY:
1926 // All of these have a single jump destination to update.
1927 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1929 blk->bbJumpDest = newJumpDest;
1935 bool redirected = false;
1936 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1938 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1940 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1944 // If any redirections happend, invalidate the switch table map for the switch.
1946 GetSwitchDescMap()->Remove(blk);
1955 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1956 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1958 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1960 // copy the jump destination(s) from "from" to "to".
1961 switch (to->bbJumpKind)
1965 case BBJ_CALLFINALLY:
1967 // All of these have a single jump destination to update.
1968 to->bbJumpDest = from->bbJumpDest;
1973 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1974 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1975 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
1977 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
1979 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
1989 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
1990 // Returns 'true' if the flow graph is modified.
1991 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
1993 bool modified = false;
1995 // Is the top of the current loop not in any nested loop?
1996 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
1998 if (optCanonicalizeLoop(loopInd))
2004 for (unsigned char child = optLoopTable[loopInd].lpChild;
2005 child != BasicBlock::NOT_IN_LOOP;
2006 child = optLoopTable[child].lpSibling)
2008 if (optCanonicalizeLoopNest(child))
2017 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2019 // Is the top uniquely part of the current loop?
2020 BasicBlock* t = optLoopTable[loopInd].lpTop;
2022 if (t->bbNatLoopNum == loopInd)
2025 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to canonicalize\n",
2026 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2028 // Otherwise, the top of this loop is also part of a nested loop.
2030 // Insert a new unique top for this loop. We must be careful to put this new
2031 // block in the correct EH region. Note that f->bbPrev might be in a different
2032 // EH region. For example:
2040 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2041 // On the other hand, the first block of multiple loops might be the first
2042 // block of a 'try' region that is completely contained in the multiple loops.
2047 // BB10 BBJ_ALWAYS => BB08
2049 // BB12 BBJ_ALWAYS => BB08
2051 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2052 // is a single-block "try" region. Neither loop "bottom" block is in the same
2053 // "try" region as BB08. This is legal because you can jump to the first block
2054 // of a try region. With EH normalization, no two "try" regions will share
2055 // this block. In this case, we need to insert a new block for the outer loop
2056 // in the same EH region as the branch from the "bottom":
2061 // BB10 BBJ_ALWAYS => BB08
2063 // BB12 BBJ_ALWAYS => BB30
2065 // Another possibility is that the "first" block of the loop nest can be the first block
2066 // of a "try" region that also has other predecessors than those in the loop, or even in
2067 // the "try" region (since blocks can target the first block of a "try" region). For example:
2071 // BB10 BBJ_ALWAYS => BB08
2073 // BB12 BBJ_ALWAYS => BB08
2076 // BB20 BBJ_ALWAYS => BB08
2078 // BB25 BBJ_ALWAYS => BB08
2080 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2081 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2082 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2083 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2084 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2085 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2086 // situation of branching to a non-first block of a 'try' region.
2088 // We can also have a loop nest where the "first" block is outside of a "try" region
2089 // and the back edges are inside a "try" region, for example:
2093 // BB09 try { BBJ_COND => BB02
2095 // BB15 BBJ_COND => BB02
2097 // BB21 } // end of "try"
2099 // In this case, both loop back edges were formed by "leave" instructions that were
2100 // imported into branches that were later made conditional. In this case, we don't
2101 // want to copy the EH region of the back edge, since that would create a block
2102 // outside of and disjoint with the "try" region of the back edge. However, to
2103 // simplify things, we disqualify this type of loop, so we should never see this here.
2105 BasicBlock* h = optLoopTable[loopInd].lpHead;
2106 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2107 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2109 // The loop must be entirely contained within a single handler region.
2110 assert(BasicBlock::sameHndRegion(f, b));
2112 // If the bottom block is in the same "try" region, then we extend the EH
2113 // region. Otherwise, we add the new block outside the "try" region.
2114 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2115 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2118 // We need to set the EH region manually. Set it to be the same
2119 // as the bottom block.
2120 newT->copyEHRegion(b);
2123 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2125 // Redirect the "bottom" of the current loop to "newT".
2126 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2127 blockMap->Set(t, newT);
2128 optRedirectBlock(b, blockMap);
2130 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2131 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2132 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2133 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2136 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2137 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2138 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2139 // edge of the blockMap, so nothing will happen.
2140 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2142 BasicBlock* topPredBlock = topPred->flBlock;
2144 // Skip if topPredBlock is in the loop.
2145 // Note that this uses block number to detect membership in the loop. We are adding blocks during canonicalization,
2146 // and those block numbers will be new, and larger than previous blocks. However, we work outside-in, so we
2147 // shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2148 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2150 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not redirecting its bottom edge\n",
2151 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2155 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum, newT->bbNum);
2156 optRedirectBlock(topPredBlock, blockMap);
2159 assert(newT->bbNext == f);
2162 newT->bbJumpKind = BBJ_ALWAYS;
2163 newT->bbJumpDest = t;
2164 newT->bbTreeList = nullptr;
2165 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2168 // If it had been a do-while loop (top == entry), update entry, as well.
2169 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2170 if (optLoopTable[loopInd].lpTop == origE)
2171 optLoopTable[loopInd].lpEntry = newT;
2172 optLoopTable[loopInd].lpTop = newT;
2173 optLoopTable[loopInd].lpFirst = newT;
2175 newT->bbNatLoopNum = loopInd;
2177 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum, dspPtr(newT), loopInd);
2179 // Make sure the head block still goes to the entry...
2180 if (h->bbJumpKind == BBJ_NONE &&
2181 h->bbNext != optLoopTable[loopInd].lpEntry)
2183 h->bbJumpKind = BBJ_ALWAYS;
2184 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2186 else if (h->bbJumpKind == BBJ_COND &&
2187 h->bbNext == newT &&
2188 newT != optLoopTable[loopInd].lpEntry)
2190 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/true);
2191 optLoopTable[loopInd].lpHead = h2;
2192 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2193 h2->bbTreeList = nullptr;
2194 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2197 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2198 // it must be the case that they were do-while's (since "h" fell through to the entry).
2199 // The new node "newT" becomes the head of such loops.
2200 for (unsigned char childLoop = optLoopTable[loopInd].lpChild;
2201 childLoop != BasicBlock::NOT_IN_LOOP;
2202 childLoop = optLoopTable[childLoop].lpSibling)
2204 if ( optLoopTable[childLoop].lpEntry == origE
2205 && optLoopTable[childLoop].lpHead == h
2206 && newT->bbJumpKind == BBJ_NONE
2207 && newT->bbNext == origE)
2209 optUpdateLoopHead(childLoop, h, newT);
2215 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2217 assert(l1 != BasicBlock::NOT_IN_LOOP);
2218 if (l1 == l2) return true;
2219 else if (l2 == BasicBlock::NOT_IN_LOOP) return false;
2220 else return optLoopContains(l1, optLoopTable[l2].lpParent);
2223 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2225 assert(optLoopTable[loopInd].lpHead == from);
2226 optLoopTable[loopInd].lpHead = to;
2227 for (unsigned char childLoop = optLoopTable[loopInd].lpChild;
2228 childLoop != BasicBlock::NOT_IN_LOOP;
2229 childLoop = optLoopTable[childLoop].lpSibling)
2231 if (optLoopTable[childLoop].lpHead == from)
2232 optUpdateLoopHead(childLoop, from, to);
2236 /*****************************************************************************
2237 * If the : i += const" will cause an overflow exception for the small types.
2240 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2246 case TYP_BYTE: type_MAX = SCHAR_MAX; break;
2247 case TYP_UBYTE: type_MAX = UCHAR_MAX; break;
2248 case TYP_SHORT: type_MAX = SHRT_MAX; break;
2249 case TYP_CHAR: type_MAX = USHRT_MAX; break;
2251 case TYP_UINT: // Detected by checking for 32bit ....
2252 case TYP_INT: return false; // ... overflow same as done for TYP_INT
2254 default: NO_WAY("Bad type");
2257 if (iterAtExit > type_MAX)
2263 /*****************************************************************************
2264 * If the "i -= const" will cause an underflow exception for the small types
2267 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2273 case TYP_BYTE: type_MIN = SCHAR_MIN; break;
2274 case TYP_SHORT: type_MIN = SHRT_MIN; break;
2275 case TYP_UBYTE: type_MIN = 0; break;
2276 case TYP_CHAR: type_MIN = 0; break;
2278 case TYP_UINT: // Detected by checking for 32bit ....
2279 case TYP_INT: return false; // ... underflow same as done for TYP_INT
2281 default: NO_WAY("Bad type");
2284 if (iterAtExit < type_MIN)
2290 /*****************************************************************************
2292 * Helper for unroll loops - Computes the number of repetitions
2293 * in a constant loop. If it cannot prove the number is constant returns false
2296 bool Compiler::optComputeLoopRep(int constInit,
2299 genTreeOps iterOper,
2300 var_types iterOperType,
2301 genTreeOps testOper,
2304 unsigned * iterCount)
2306 noway_assert(genActualType(iterOperType) == TYP_INT);
2309 __int64 constLimitX;
2314 // Using this, we can just do a signed comparison with other 32 bit values.
2315 if (unsTest) constLimitX = (unsigned int)constLimit;
2316 else constLimitX = ( signed int)constLimit;
2318 switch (iterOperType)
2320 // For small types, the iteration operator will narrow these values if big
2322 #define INIT_ITER_BY_TYPE(type) constInitX = (type)constInit; iterInc = (type)iterInc;
2324 case TYP_BYTE: INIT_ITER_BY_TYPE( signed char ); break;
2325 case TYP_UBYTE: INIT_ITER_BY_TYPE(unsigned char ); break;
2326 case TYP_SHORT: INIT_ITER_BY_TYPE( signed short); break;
2327 case TYP_CHAR: INIT_ITER_BY_TYPE(unsigned short); break;
2329 // For the big types, 32 bit arithmetic is performed
2332 case TYP_UINT: if (unsTest) constInitX = (unsigned int)constInit;
2333 else constInitX = ( signed int)constInit;
2337 noway_assert(!"Bad type");
2341 /* If iterInc is zero we have an infinite loop */
2345 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2346 iterSign = (iterInc > 0) ? +1 : -1;
2348 /* Initialize loopCount to zero */
2351 // If dupCond is true then the loop head contains a test which skips
2352 // this loop, if the constInit does not pass the loop test
2353 // Such a loop can execute zero times.
2354 // If dupCond is false then we have a true do-while loop which we
2355 // always execute the loop once before performing the loop test
2359 constInitX += iterInc;
2362 // bail if count is based on wrap-around math
2365 if (constLimitX < constInitX)
2368 else if (constLimitX > constInitX)
2371 /* Compute the number of repetitions */
2375 __int64 iterAtExitX;
2378 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2382 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2383 * have a constant number of iterations or loop forever -
2384 * we have to compute (lim-init) mod iterInc to see if it is zero.
2385 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2386 * which is probably not what the end user wanted, but it is legal.
2391 /* Stepping by one, i.e. Mod with 1 is always zero */
2394 if (((constLimitX - constInitX) % iterInc) != 0)
2400 noway_assert(iterInc < 0);
2401 /* Stepping by -1, i.e. Mod with 1 is always zero */
2404 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2418 if (constInitX != constLimitX)
2419 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2421 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2424 iterAtExitX = (unsigned)iterAtExitX;
2426 // Check if iteration incr will cause overflow for small types
2427 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2430 // iterator with 32bit overflow. Bad for TYP_(U)INT
2431 if (iterAtExitX < constLimitX)
2434 *iterCount = loopCount;
2450 noway_assert(!"Unknown operator for loop iterator");
2464 if (constInitX < constLimitX)
2465 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2467 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2470 iterAtExitX = (unsigned)iterAtExitX;
2472 // Check if iteration incr will cause overflow for small types
2473 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2476 // iterator with 32bit overflow. Bad for TYP_(U)INT
2477 if (iterAtExitX < constLimitX)
2480 *iterCount = loopCount;
2496 noway_assert(!"Unknown operator for loop iterator");
2510 if (constInitX <= constLimitX)
2511 loopCount += (unsigned) ((constLimitX - constInitX) / iterInc) + 1;
2513 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2516 iterAtExitX = (unsigned)iterAtExitX;
2518 // Check if iteration incr will cause overflow for small types
2519 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2522 // iterator with 32bit overflow. Bad for TYP_(U)INT
2523 if (iterAtExitX <= constLimitX)
2526 *iterCount = loopCount;
2542 noway_assert(!"Unknown operator for loop iterator");
2556 if (constInitX > constLimitX)
2557 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2559 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2562 iterAtExitX = (unsigned)iterAtExitX;
2564 // Check if small types will underflow
2565 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2568 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2569 if (iterAtExitX > constLimitX)
2572 *iterCount = loopCount;
2588 noway_assert(!"Unknown operator for loop iterator");
2602 if (constInitX >= constLimitX)
2603 loopCount += (unsigned) ((constLimitX - constInitX) / iterInc) + 1;
2605 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2608 iterAtExitX = (unsigned)iterAtExitX;
2610 // Check if small types will underflow
2611 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2614 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2615 if (iterAtExitX >= constLimitX)
2618 *iterCount = loopCount;
2634 noway_assert(!"Unknown operator for loop iterator");
2639 noway_assert(!"Unknown operator for loop condition");
2646 /*****************************************************************************
2648 * Look for loop unrolling candidates and unroll them
2652 #pragma warning(push)
2653 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
2655 void Compiler::optUnrollLoops()
2657 if (compCodeOpt() == SMALL_CODE)
2660 if (optLoopCount == 0)
2664 if (JitConfig.JitNoUnroll())
2670 if (optCanCloneLoops())
2677 printf("*************** In optUnrollLoops()\n");
2679 /* Look for loop unrolling candidates */
2681 /* Double loop so that after unrolling an inner loop we set change to true
2682 * and we then go back over all of the loop candidates and try to unroll
2683 * the next outer loop, until we don't unroll any loops,
2684 * then change will be false and we are done.
2688 bool change = false;
2690 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2694 BasicBlock * bottom;
2704 int lbeg; // initial value for iterator
2705 int llim; // limit value for iterator
2706 unsigned lvar; // iterator lclVar #
2707 int iterInc; // value to increment the iterator
2708 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2709 var_types iterOperType; // type result of the oper (for overflow instrs)
2710 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2711 bool unsTest; // Is the comparison u/int
2713 unsigned totalIter; // total number of iterations in the constant loop
2714 unsigned loopCostSz; // Cost is size of one iteration
2715 unsigned loopFlags; // actual lpFlags
2716 unsigned requiredFlags; // required lpFlags
2718 GenTree * loopList; // new stmt list of the unrolled loop
2721 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] =
2729 noway_assert(ITER_LIMIT[ SMALL_CODE] == 0);
2730 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2732 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2735 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2739 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] =
2747 noway_assert(UNROLL_LIMIT_SZ[ SMALL_CODE] == 0);
2748 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2750 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2753 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2754 unrollLimitSz *= 10;
2757 loopFlags = optLoopTable[lnum].lpFlags;
2758 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2761 /* Ignore the loop if we don't have a do-while with a single exit
2762 that has a constant number of iterations */
2764 if ((loopFlags & requiredFlags) != requiredFlags)
2767 /* ignore if removed or marked as not unrollable */
2769 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2772 head = optLoopTable[lnum].lpHead; noway_assert(head);
2773 bottom = optLoopTable[lnum].lpBottom; noway_assert(bottom);
2775 /* The single exit must be at the bottom of the loop */
2776 noway_assert(optLoopTable[lnum].lpExit);
2777 if (optLoopTable[lnum].lpExit != bottom)
2780 /* Unrolling loops with jumps in them is not worth the headache
2781 * Later we might consider unrolling loops after un-switching */
2786 block = block->bbNext; noway_assert(block);
2788 if (block->bbJumpKind != BBJ_NONE)
2790 if (block != bottom)
2794 while (block != bottom);
2796 /* Get the loop data:
2800 - iterator increment
2801 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2802 - loop test type (i.e. GT_GE, GT_LT, etc...)
2805 lbeg = optLoopTable[lnum].lpConstInit;
2806 llim = optLoopTable[lnum].lpConstLimit();
2807 testOper = optLoopTable[lnum].lpTestOper();
2809 lvar = optLoopTable[lnum].lpIterVar();
2810 iterInc = optLoopTable[lnum].lpIterConst();
2811 iterOper = optLoopTable[lnum].lpIterOper();
2813 iterOperType= optLoopTable[lnum].lpIterOperType();
2814 unsTest =(optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2816 if (lvaTable[lvar].lvAddrExposed) // If the loop iteration variable is address-exposed then bail
2818 if (lvaTable[lvar].lvIsStructField) // If the loop iteration variable is a promoted field from a struct then bail
2821 /* Locate the pre-header and initialization and increment/test statements */
2823 phdr = head->bbTreeList; noway_assert(phdr);
2824 loop = bottom->bbTreeList; noway_assert(loop);
2826 init = head->lastStmt(); noway_assert(init && (init->gtNext == 0));
2827 test = bottom->lastStmt(); noway_assert(test && (test->gtNext == 0));
2828 incr = test->gtPrev; noway_assert(incr);
2830 if (init->gtFlags & GTF_STMT_CMPADD)
2832 /* Must be a duplicated loop condition */
2833 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2836 init = init->gtPrev; noway_assert(init);
2841 /* Find the number of iterations - the function returns false if not a constant number */
2843 if (!optComputeLoopRep(lbeg, llim,
2844 iterInc, iterOper, iterOperType,
2845 testOper, unsTest, dupCond,
2849 /* Forget it if there are too many repetitions or not a constant loop */
2851 if (totalIter > iterLimit)
2854 noway_assert(init->gtOper == GT_STMT); init = init->gtStmt.gtStmtExpr;
2855 noway_assert(test->gtOper == GT_STMT); test = test->gtStmt.gtStmtExpr;
2856 noway_assert(incr->gtOper == GT_STMT); incr = incr->gtStmt.gtStmtExpr;
2858 // Don't unroll loops we don't understand.
2859 if (incr->gtOper == GT_ASG)
2864 /* Make sure everything looks ok */
2866 (init->gtOper != GT_ASG) ||
2867 (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
2868 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
2869 (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
2870 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
2872 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
2873 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
2874 (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
2875 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
2876 (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
2878 (test->gtOper != GT_JTRUE) )
2880 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
2884 /* heuristic - Estimated cost in code size of the unrolled loop */
2892 block = block->bbNext;
2894 /* Visit all the statements in the block */
2896 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
2898 /* Get the expression and stop if end reached */
2900 GenTreePtr expr = stmt->gtStmtExpr;
2904 /* Calculate gtCostSz */
2905 gtSetStmtInfo(stmt);
2907 /* Update loopCostSz */
2908 loopCostSz += stmt->gtCostSz;
2911 while (block != bottom);
2913 /* Compute the estimated increase in code size for the unrolled loop */
2915 unsigned int fixedLoopCostSz; fixedLoopCostSz = 8;
2917 int unrollCostSz; unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
2919 /* Don't unroll if too much code duplication would result. */
2921 if (unrollCostSz > unrollLimitSz)
2923 /* prevent this loop from being revisited */
2924 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
2928 /* Looks like a good idea to unroll this loop, let's do it! */
2933 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
2934 if (head->bbNext->bbNum != bottom->bbNum)
2935 printf("..BB%02u", bottom->bbNum);
2936 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
2937 printf(" unrollCostSz = %d\n", unrollCostSz);
2942 /* Create the unrolled loop statement list */
2947 for (lval = lbeg; totalIter; totalIter--)
2956 block = block->bbNext; noway_assert(block);
2958 /* Visit all the statements in the block */
2960 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
2962 /* Stop if we've reached the end of the loop */
2964 if (stmt->gtStmtExpr == incr)
2967 /* Clone/substitute the expression */
2969 expr = gtCloneExpr(stmt, 0, lvar, lval);
2971 // cloneExpr doesn't handle everything
2975 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
2979 /* Append the expression to our list */
2982 loopLast->gtNext = expr;
2986 expr->gtPrev = loopLast;
2990 while (block != bottom);
2992 /* update the new value for the unrolled iterator */
3006 noway_assert(!"Unrolling not implemented for this loop iterator");
3010 noway_assert(!"Unknown operator for constant loop iterator");
3015 /* Finish the linked list */
3019 loopList->gtPrev = loopLast;
3020 loopLast->gtNext = 0;
3023 /* Replace the body with the unrolled one */
3029 block = block->bbNext; noway_assert(block);
3030 block->bbTreeList = 0;
3031 block->bbJumpKind = BBJ_NONE;
3032 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3034 while (block != bottom);
3036 bottom->bbJumpKind = BBJ_NONE;
3037 bottom->bbTreeList = loopList;
3038 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3039 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3043 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3045 /* Update bbRefs and bbPreds */
3046 /* Here head->bbNext is bottom !!! - Replace it */
3048 fgRemoveRefPred(head->bbNext, bottom);
3050 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3051 * (the last value of the iterator in the loop)
3052 * and drop the jump condition since the unrolled loop will always execute */
3054 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3056 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3058 if (head->bbJumpKind == BBJ_COND)
3060 phdr = head->bbTreeList; noway_assert(phdr);
3061 test = phdr->gtPrev;
3063 noway_assert(test && (test->gtNext == 0));
3064 noway_assert(test->gtOper == GT_STMT);
3065 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3067 init = test->gtPrev; noway_assert(init && (init->gtNext == test));
3068 noway_assert(init->gtOper == GT_STMT);
3071 phdr->gtPrev = init;
3072 head->bbJumpKind = BBJ_NONE;
3073 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3075 /* Update bbRefs and bbPreds */
3077 fgRemoveRefPred(head->bbJumpDest, head);
3081 /* the loop must execute */
3082 noway_assert(head->bbJumpKind == BBJ_NONE);
3088 printf("Whole unrolled loop:\n");
3090 GenTreePtr s = loopList;
3094 noway_assert(s->gtOper == GT_STMT);
3105 /* Remember that something has changed */
3109 /* Make sure to update loop table */
3111 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3112 * (also make head and bottom NULL - to hit an assert or GPF) */
3114 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3115 optLoopTable[lnum].lpHead =
3116 optLoopTable[lnum].lpBottom = nullptr;
3126 fgDebugCheckBBlist();
3130 #pragma warning(pop)
3133 /*****************************************************************************
3135 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3136 * not execute a method call.
3139 bool Compiler::optReachWithoutCall(BasicBlock *topBB,
3142 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3143 // as some helper calls are neither interruptible nor hijackable.
3144 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3145 // those helpers too.
3147 noway_assert(topBB->bbNum <= botBB->bbNum);
3149 // We can always check topBB and botBB for any gc safe points and early out
3151 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3154 // Otherwise we will need to rely upon the dominator sets
3156 if (!fgDomsComputed)
3158 // return a conservative answer of true when we don't have the dominator sets
3162 BasicBlock *curBB = topBB;
3165 noway_assert(curBB);
3167 // If we added a loop pre-header block then we will
3168 // have a bbNum greater than fgLastBB, and we won't have
3169 // any dominator information about this block, so skip it.
3171 if (curBB->bbNum <= fgLastBB->bbNum)
3173 noway_assert(curBB->bbNum <= botBB->bbNum);
3175 // Does this block contain a gc safe point?
3177 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3179 // Will this block always execute on the way to botBB ?
3181 // Since we are checking every block in [topBB .. botBB] and we are using
3182 // a lexical definition of a loop.
3183 // (all that we know is that is that botBB is a back-edge to topBB)
3184 // Thus while walking blocks in this range we may encounter some blocks
3185 // that are not really part of the loop, and so we need to perform
3186 // some additional checks:
3188 // We will check that the current 'curBB' is reachable from 'topBB'
3189 // and that it dominates the block containing the back-edge 'botBB'
3190 // When both of these are true then we know that the gcsafe point in 'curBB'
3191 // will be encountered in the loop and we can return false
3193 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3198 // If we've reached the destination block, then we're done
3205 curBB = curBB->bbNext;
3208 // If we didn't find any blocks that contained a gc safe point and
3209 // also met the fgDominate and fgReachable criteria then we must return true
3214 /*****************************************************************************
3216 * Find the loop termination test at the bottom of the loop
3220 GenTreePtr optFindLoopTermTest(BasicBlock *bottom)
3222 GenTreePtr testt = bottom->bbTreeList;
3224 assert(testt && testt->gtOper == GT_STMT);
3226 GenTreePtr result = testt->gtPrev;
3229 while (testt->gtNext)
3230 testt = testt->gtNext;
3232 assert(testt == result);
3238 /*****************************************************************************
3239 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3242 void Compiler::fgOptWhileLoop(BasicBlock * block)
3244 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3245 noway_assert(compCodeOpt() != SMALL_CODE);
3248 Optimize while loops into do { } while loop
3249 Our loop hoisting logic requires do { } while loops.
3250 Specifically, we're looking for the following case:
3261 If we find this, and the condition is simple enough, we change
3262 the loop to the following:
3267 // else fall-through
3278 /* Does the BB end with an unconditional jump? */
3280 if (block->bbJumpKind != BBJ_ALWAYS ||
3281 (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)) // It can't be one of the ones we use for our exception magic
3284 // It has to be a forward jump
3285 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3287 if (fgIsForwardBranch(block) == false)
3290 // Get hold of the jump target
3291 BasicBlock * bTest = block->bbJumpDest;
3293 // Does the block consist of 'jtrue(cond) block' ?
3294 if (bTest->bbJumpKind != BBJ_COND)
3297 // bTest must be a backwards jump to block->bbNext
3298 if (bTest->bbJumpDest != block->bbNext)
3301 // Since test is a BBJ_COND it will have a bbNext
3302 noway_assert(bTest->bbNext);
3304 // 'block' must be in the same try region as the condition, since we're going to insert
3305 // a duplicated condition in 'block', and the condition might include exception throwing code.
3306 if (!BasicBlock::sameTryRegion(block, bTest))
3309 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3310 // same try region (or no try region) to avoid generating illegal flow.
3311 BasicBlock* bTestNext = bTest->bbNext;
3312 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3315 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3317 // bTest must only contain only a jtrue with no other stmts, we will only clone
3318 // the conditional, so any other statements will not get cloned
3319 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3321 if (bTest->bbTreeList != condStmt)
3324 /* Get to the condition node from the statement tree */
3326 noway_assert(condStmt->gtOper == GT_STMT);
3328 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3329 noway_assert(condTree->gtOper == GT_JTRUE);
3331 condTree = condTree->gtOp.gtOp1;
3333 // The condTree has to be a RelOp comparison
3334 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3336 if (condTree->OperIsCompare() == false)
3339 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3341 gtPrepareCost(condTree);
3342 unsigned estDupCostSz = condTree->gtCostSz;
3344 double loopIterations = (double) BB_LOOP_WEIGHT;
3346 bool allProfileWeightsAreValid = false;
3347 BasicBlock::weight_t weightBlock = block->bbWeight;
3348 BasicBlock::weight_t weightTest = bTest->bbWeight;
3349 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3351 // If we have profile data then we calculate the number of time
3352 // the loop will iterate into loopIterations
3353 if (fgIsUsingProfileWeights())
3355 // Only rely upon the profile weight when all three of these blocks
3356 // have good profile weights
3357 if ((block->bbFlags & BBF_PROF_WEIGHT) &&
3358 (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3359 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3361 allProfileWeightsAreValid = true;
3363 // If this while loop never iterates then don't bother transforming
3364 if (weightNext == 0)
3367 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3368 // if the profile weights are all valid.
3370 // weightNext is the number of time this loop iterates
3371 // weightBlock is the number of times that we enter the while loop
3372 // loopIterations is the average number of times that this loop iterates
3374 if (weightTest >= weightBlock)
3376 loopIterations = (double) block->bbNext->bbWeight / (double) block->bbWeight;
3381 unsigned maxDupCostSz = 32;
3383 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3384 // set loop weights yet
3385 if ((compCodeOpt() == FAST_CODE) ||
3386 compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3391 // If this loop iterates a lot then raise the maxDupCost
3392 if (loopIterations >= 12.0)
3394 if (loopIterations >= 96.0)
3397 // If the loop condition has a shared static helper, we really want this loop converted
3398 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3399 // be executed on every loop iteration.
3400 int countOfHelpers = 0;
3401 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3403 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3405 maxDupCostSz += 24 * min(countOfHelpers, (int) (loopIterations + 1.5));
3408 // If the compare has too high cost then we don't want to dup
3410 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3415 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3416 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3418 costIsTooHigh ? "not done" : "performed",
3420 costIsTooHigh ? "greater" : "less or equal",
3422 loopIterations, countOfHelpers,
3423 allProfileWeightsAreValid ? "true" : "false");
3432 /* Looks good - duplicate the condition test */
3434 condTree->gtFlags |= GTF_RELOP_ZTT;
3436 condTree = gtCloneExpr(condTree);
3437 gtReverseCond(condTree);
3439 // Make sure clone expr copied the flag
3440 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3442 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3444 /* Create a statement entry out of the condition and
3445 append the condition test at the end of 'block' */
3447 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3449 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3451 #ifdef DEBUGGING_SUPPORT
3452 if (opts.compDbgInfo)
3453 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3456 // If we have profile data for all blocks and we know that we are cloning the
3457 // bTest block into block and thus changing the control flow from block so
3458 // that it no longer goes directly to bTest anymore, we have to adjust the
3459 // weight of bTest by subtracting out the weight of block.
3461 if (allProfileWeightsAreValid)
3464 // Some additional sanity checks before adjusting the weight of bTest
3466 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3468 // Get the two edge that flow out of bTest
3469 flowList * edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3470 flowList * edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3472 // Calculate the new weight for block bTest
3474 BasicBlock::weight_t newWeightTest = (weightTest > weightBlock)
3475 ? (weightTest - weightBlock)
3477 bTest->bbWeight = newWeightTest;
3479 if (newWeightTest == BB_ZERO_WEIGHT)
3481 bTest->bbFlags |= BBF_RUN_RARELY;
3482 // All out edge weights are set to zero
3483 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3484 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3485 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3486 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3490 // Update the our edge weights
3491 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3492 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3493 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3494 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3499 /* Change the block to end with a conditional jump */
3501 block->bbJumpKind = BBJ_COND;
3502 block->bbJumpDest = bTest->bbNext;
3504 /* Mark the jump dest block as being a jump target */
3505 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
3507 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3509 fgAddRefPred(block->bbNext, block);
3511 fgRemoveRefPred(bTest, block);
3512 fgAddRefPred(bTest->bbNext, block);
3517 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)",
3518 block->bbNum, block->bbNext->bbNum, bTest->bbNum);
3519 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3521 gtDispTree(copyOfCondStmt);
3527 /*****************************************************************************
3529 * Optimize the BasicBlock layout of the method
3532 void Compiler::optOptimizeLayout()
3534 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3539 printf("*************** In optOptimizeLayout()\n");
3543 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3544 fgDebugCheckBBlist();
3547 noway_assert(fgModified == false);
3549 for (BasicBlock *block = fgFirstBB; block; block = block->bbNext)
3551 /* Make sure the appropriate fields are initialized */
3553 if (block->bbWeight == BB_ZERO_WEIGHT)
3555 /* Zero weighted block can't have a LOOP_HEAD flag */
3556 noway_assert(block->isLoopHead() == false);
3560 assert(block->bbLoopNum == 0);
3562 if (compCodeOpt() != SMALL_CODE)
3564 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3566 fgOptWhileLoop(block);
3572 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3573 fgComputeEdgeWeights();
3576 fgUpdateFlowGraph(true);
3578 fgUpdateFlowGraph();
3581 /*****************************************************************************
3583 * Perform loop inversion, find and classify natural loops
3586 void Compiler::optOptimizeLoops()
3588 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3592 printf("*************** In optOptimizeLoops()\n");
3595 optSetBlockWeights();
3597 /* Were there any loops in the flow graph? */
3601 /* now that we have dominator information we can find loops */
3603 optFindNaturalLoops();
3605 unsigned loopNum = 0;
3607 /* Iterate over the flow graph, marking all loops */
3609 /* We will use the following terminology:
3610 * top - the first basic block in the loop (i.e. the head of the backward edge)
3611 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3612 * lastBottom - used when we have multiple back-edges to the same top
3619 for (top = fgFirstBB; top; top = top->bbNext)
3621 BasicBlock * foundBottom = NULL;
3623 for (pred = top->bbPreds; pred; pred = pred->flNext)
3625 /* Is this a loop candidate? - We look for "back edges" */
3627 BasicBlock * bottom = pred->flBlock;
3629 /* is this a backward edge? (from BOTTOM to TOP) */
3631 if (top->bbNum > bottom->bbNum)
3634 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3636 if (top->isLoopHead() == false)
3639 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3641 if ((bottom->bbJumpKind != BBJ_COND) &&
3642 (bottom->bbJumpKind != BBJ_ALWAYS) )
3645 /* the top block must be able to reach the bottom block */
3646 if (!fgReachable(top, bottom))
3649 /* Found a new loop, record the longest backedge in foundBottom */
3651 if ((foundBottom == NULL) || (bottom->bbNum > foundBottom->bbNum))
3653 foundBottom = bottom;
3661 /* Mark the loop header as such */
3662 assert(FitsIn<unsigned char>(loopNum));
3663 top->bbLoopNum = (unsigned char) loopNum;
3666 /* Mark all blocks between 'top' and 'bottom' */
3668 optMarkLoopBlocks(top, foundBottom, false);
3671 // We track at most 255 loops
3675 totalUnnatLoopOverflows++;
3682 totalUnnatLoopCount += loopNum;
3690 printf("\nFound a total of %d loops.", loopNum);
3691 printf("\nAfter loop weight marking:\n");
3692 fgDispBasicBlocks();
3697 optLoopsMarked = true;
3701 //------------------------------------------------------------------------
3702 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3705 // loopNum - the current loop index for which conditions are derived.
3706 // context - data structure where all loop cloning info is kept.
3709 // "false" if conditions cannot be obtained. "true" otherwise.
3710 // The cloning conditions are updated in the "conditions"[loopNum] field
3711 // of the "context" parameter.
3714 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3715 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3716 // condition is "less than". If the initializer is "var" init then adds condition
3717 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3718 // are added to "context". These conditions are checked in the pre-header block
3719 // and the cloning choice is made.
3722 // Callers should assume AND operation is used i.e., if all conditions are
3723 // true, then take the fast path.
3725 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3727 JITDUMP("------------------------------------------------------------\n");
3728 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3730 LoopDsc* loop = &optLoopTable[loopNum];
3731 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3733 if (loop->lpTestOper() == GT_LT)
3735 // Stride conditions
3736 if (loop->lpIterConst() <= 0)
3738 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3743 if (loop->lpFlags & LPFLG_CONST_INIT)
3745 // Only allowing const init at this time.
3746 if (loop->lpConstInit < 0)
3748 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3752 else if (loop->lpFlags & LPFLG_VAR_INIT)
3755 LC_Condition geZero(GT_GE,
3756 LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3757 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3758 context->EnsureConditions(loopNum)->Push(geZero);
3762 JITDUMP("> Not variable init\n");
3768 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3770 int limit = loop->lpConstLimit();
3773 JITDUMP("> limit %d is invalid\n", limit);
3776 ident = LC_Ident(limit, LC_Ident::Const);
3778 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3780 unsigned limitLcl = loop->lpVarLimit();
3781 ident = LC_Ident(limitLcl, LC_Ident::Var);
3783 LC_Condition geZero(GT_GE,
3785 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3787 context->EnsureConditions(loopNum)->Push(geZero);
3789 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
3791 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
3792 if (!loop->lpArrLenLimit(this, index))
3794 JITDUMP("> ArrLen not matching");
3797 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
3799 // Ensure that this array must be dereference-able, before executing the actual condition.
3800 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
3801 context->EnsureDerefs(loopNum)->Push(array);
3805 JITDUMP("> Undetected limit\n");
3809 for (unsigned i = 0; i < optInfos->Size(); ++i)
3811 LcOptInfo* optInfo = optInfos->GetRef(i);
3812 switch (optInfo->GetOptType())
3814 case LcOptInfo::LcJaggedArray:
3817 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
3818 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
3819 LC_Ident arrLenIdent = LC_Ident(arrLen);
3821 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
3822 context->EnsureConditions(loopNum)->Push(cond);
3824 // Ensure that this array must be dereference-able, before executing the actual condition.
3825 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
3826 context->EnsureDerefs(loopNum)->Push(array);
3829 case LcOptInfo::LcMdArray:
3831 // limit <= mdArrLen
3832 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
3833 LC_Condition cond(GT_LE,
3837 LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator()), mdArrInfo->dim, LC_Array::None)
3839 context->EnsureConditions(loopNum)->Push(cond);
3844 JITDUMP("Unknown opt\n");
3848 JITDUMP("Conditions: (");
3849 DBEXEC(verbose, context->PrintConditions(loopNum));
3856 //------------------------------------------------------------------------------------
3857 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
3860 // loopNum - the current loop index for which conditions are derived.
3861 // context - data structure where all loop cloning info is kept.
3864 // "false" if conditions cannot be obtained. "true" otherwise.
3865 // The deref conditions are updated in the "derefConditions"[loopNum] field
3866 // of the "context" parameter.
3868 // Definition of Deref Conditions:
3869 // To be able to check for the loop cloning condition that (limitVar <= a.len)
3870 // we should first be able to dereference "a". i.e., "a" is non-null.
3876 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
3877 // // (n <= a[i][j].len) and other safer conditions to take the fast path
3880 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
3881 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
3882 // be true to do the deref.
3884 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
3886 // Note the short circuiting AND. Implication: these conditions should be performed in separate
3887 // blocks each of which will branch to slow path if the condition evaluates to false.
3889 // Now, imagine a situation where we have
3890 // a[x][y][k] = 20 and a[i][j][k] = 0
3891 // also in the inner most loop where x, y are parameters, then our conditions will have
3895 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
3897 // But these conditions can be checked together with conditions
3898 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
3901 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
3902 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
3903 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
3904 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
3906 // This naturally yields a tree style pattern, where the nodes of the tree are
3907 // the array and indices respectively.
3923 // Notice that the variables in the same levels can have their conditions combined in the
3924 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
3925 // combined with a short-circuiting AND (i.e., different basic blocks).
3928 // Construct a tree of array indices and the array which will generate the optimal
3929 // conditions for loop cloning.
3931 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
3946 // In this method, we will construct such a tree by descending depth first into the array
3947 // index operation and forming a tree structure as we encounter the array or the index variables.
3949 // This tree structure will then be used to generate conditions like below:
3950 // (a != null) & (b != null) && // from the first level of the tree.
3952 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
3953 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
3955 // (j < a[i].len) & (y < a[i].len) && // from the third level.
3956 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
3961 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
3963 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
3966 // Get the dereference-able arrays.
3967 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
3969 // For each array in the dereference list, construct a tree,
3970 // where the nodes are array and index variables and an edge 'u-v'
3971 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
3972 // 'u-v-w' transitively if u[v][w] occurs.
3973 for (unsigned i = 0; i < deref->Size(); ++i)
3975 LC_Array& array = (*deref)[i];
3977 // First populate the array base variable.
3978 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
3979 if (node == nullptr)
3981 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
3985 // For each dimension (level) for the array, populate the tree with the variable
3986 // from that dimension.
3987 unsigned rank = (unsigned) array.GetDimRank();
3988 for (unsigned i = 0; i < rank; ++i)
3990 node->EnsureChildren(getAllocator());
3991 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
3994 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
3995 node->children->Push(tmp);
3998 // Descend one level down.
4002 // Keep the maxRank of all array dereferences.
4003 maxRank = max((int) rank, maxRank);
4009 for (unsigned i = 0; i < nodes.Size(); ++i)
4011 if (i != 0) printf(",");
4023 // First level will always yield the null-check, since it is made of the array base variables.
4024 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4025 // So add 1 after rank * 2.
4026 unsigned condBlocks = (unsigned) maxRank * 2 + 1;
4028 // Heuristic to not create too many blocks;
4034 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4035 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4036 for (unsigned i = 0; i < nodes.Size(); ++i)
4038 nodes[i]->DeriveLevelConditions(levelCond);
4041 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4046 //----------------------------------------------------------------------------
4047 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4050 // block - the block in which the helper call needs to be inserted.
4051 // insertBefore - the tree before which the helper call will be inserted.
4053 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4055 if (JitConfig.JitDebugLogLoopCloning() == 0)
4059 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4060 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4061 fgInsertStmtBefore(block, insertBefore, stmt);
4062 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4066 //------------------------------------------------------------------------
4067 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4068 // candidates gathered during the cloning phase.
4071 // loopNum - the current loop index for which the optimizations are performed.
4072 // context - data structure where all loop cloning info is kept.
4073 // dynamicPath - If true, the optimization is performed in the fast path among the
4074 // cloned loops. If false, it means this is the only path (i.e.,
4075 // there is no slow path.)
4078 // Perform the optimizations on the fast path i.e., the path in which the
4079 // optimization candidates were collected at the time of identifying them.
4080 // The candidates store all the information necessary (the tree/stmt/block
4081 // they are from) to perform the optimization.
4084 // The unoptimized path is either already cloned when this method is called or
4085 // there is no unoptimized path (got eliminated statically.) So this method
4086 // performs the optimizations assuming that the path in which the candidates
4087 // were collected is the fast path in which the optimizations will be performed.
4089 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4091 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4092 for (unsigned i = 0; i < optInfos->Size(); ++i)
4094 LcOptInfo* optInfo = optInfos->GetRef(i);
4095 switch (optInfo->GetOptType())
4097 case LcOptInfo::LcJaggedArray:
4099 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4100 compCurBB = arrIndexInfo->arrIndex.useBlock;
4101 optRemoveRangeCheck(
4102 arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim],
4103 arrIndexInfo->stmt, true, GTF_ASG, true);
4104 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4107 case LcOptInfo::LcMdArray:
4108 // TODO-CQ: CLONE: Implement.
4117 //----------------------------------------------------------------------------
4118 // optCanCloneLoops: Use the environment flag to determine whether loop
4119 // cloning is allowed to be performed.
4122 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4123 // Disabled for retail for now.
4125 bool Compiler::optCanCloneLoops()
4127 // Enabled for retail builds now.
4128 unsigned cloneLoopsFlag = 1;
4130 cloneLoopsFlag = JitConfig.JitCloneLoops();
4132 return (cloneLoopsFlag != 0);
4136 //----------------------------------------------------------------------------
4137 // optIsLoopClonable: Determine whether this loop can be cloned.
4140 // loopInd loop index which needs to be checked if it can be cloned.
4143 // Returns true if the loop can be cloned. If it returns false
4144 // prints a message in debug as why the loop can't be cloned.
4146 bool Compiler::optIsLoopClonable(unsigned loopInd)
4148 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4149 // inserting new EH regions in the exception table yet.
4150 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4151 unsigned loopRetCount = 0;
4152 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4154 if (blk->bbJumpKind == BBJ_RETURN) loopRetCount++;
4155 if (bbIsTryBeg(blk))
4157 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n",
4158 loopInd, info.compFullName);
4163 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4164 // into the middle of a handler (to go to the cloned copy.) Reject.
4165 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4167 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4171 // If the head and entry are in different EH regions, reject.
4172 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4174 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4178 // Is the first block after the last block of the loop a handler or filter start?
4179 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4180 // and go to where the original loop did. That raises problems when we don't actually go to
4181 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4182 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4183 // loop. This is just a corner to cut to get this working faster.
4184 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4185 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4187 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4191 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4192 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4193 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a heuristic tradeoff;
4194 // perhaps we're just choosing to live with 4 as the limit.)
4195 if (fgReturnCount + loopRetCount > 4)
4197 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, would exceed the limit of 4.\n", loopRetCount, fgReturnCount);
4201 // Otherwise, we're going to add those return blocks.
4202 fgReturnCount += loopRetCount;
4207 /*****************************************************************************
4209 * Identify loop cloning opportunities, derive loop cloning conditions,
4210 * perform loop cloning, use the derived conditions to choose which
4213 void Compiler::optCloneLoops()
4215 JITDUMP("\n*************** In optCloneLoops()\n");
4216 if (optLoopCount == 0 || !optCanCloneLoops())
4224 printf("Blocks/Trees at start of phase\n");
4225 fgDispBasicBlocks(true);
4229 LoopCloneContext context(optLoopCount, getAllocator());
4231 // Obtain array optimization candidates in the context.
4232 optObtainLoopCloningOpts(&context);
4234 // For each loop, derive cloning conditions for the optimization candidates.
4235 for (unsigned i = 0; i < optLoopCount; ++i)
4237 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4238 if (optInfos == nullptr)
4243 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4245 JITDUMP("> Conditions could not be obtained\n");
4246 context.CancelLoopOptInfo(i);
4250 bool allTrue = false;
4251 bool anyFalse = false;
4252 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4255 context.CancelLoopOptInfo(i);
4259 // Perform static optimizations on the fast path since we always
4260 // have to take the cloned path.
4261 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4263 // No need to clone.
4264 context.CancelLoopOptInfo(i);
4271 // The code in this #if has been useful in debugging loop cloning issues, by
4272 // enabling selective enablement of the loop cloning optimization according to
4275 unsigned methHash = info.compMethodHash();
4276 char* lostr = getenv("loopclonehashlo");
4277 unsigned methHashLo = 0;
4280 sscanf_s(lostr, "%x", &methHashLo);
4281 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4283 char* histr = getenv("loopclonehashhi");
4284 unsigned methHashHi = UINT32_MAX;
4287 sscanf_s(histr, "%x", &methHashHi);
4288 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4290 if (methHash < methHashLo || methHash > methHashHi)
4295 for (unsigned i = 0; i < optLoopCount; ++i)
4297 if (context.GetLoopOptInfo(i) != nullptr)
4300 context.OptimizeConditions(i DEBUGARG(verbose));
4301 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4302 optCloneLoop(i, &context);
4309 printf("\nAfter loop cloning:\n");
4310 fgDispBasicBlocks(/*dumpTrees*/true);
4315 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4317 assert(loopInd < optLoopCount);
4319 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n",
4321 optLoopTable[loopInd].lpHead->bbNum,
4322 optLoopTable[loopInd].lpFirst->bbNum,
4323 optLoopTable[loopInd].lpTop->bbNum,
4324 optLoopTable[loopInd].lpEntry->bbNum,
4325 optLoopTable[loopInd].lpBottom->bbNum);
4327 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4328 unsigned depth = optLoopDepth(loopInd);
4329 unsigned ambientWeight = 1;
4330 for (unsigned j = 0; j < depth; j++)
4332 unsigned lastWeight = ambientWeight;
4333 ambientWeight *= BB_LOOP_WEIGHT;
4334 // If the multiplication overflowed, stick at max.
4335 // (Strictly speaking, a multiplication could overflow and still have a result
4336 // that is >= lastWeight...but if so, the original weight must be pretty large,
4337 // and it got bigger, so that's OK.)
4338 if (ambientWeight < lastWeight)
4340 ambientWeight = BB_MAX_WEIGHT;
4345 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4346 // Be safe by taking the max with the head block's weight.
4347 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4349 // This is the containing loop, if any -- to label any blocks we create that are outside
4350 // the loop being cloned.
4351 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4353 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4354 optEnsureUniqueHead(loopInd, ambientWeight);
4356 // We're going to make
4368 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4380 BasicBlock* h = optLoopTable[loopInd].lpHead;
4381 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4383 // Make a new block to be the unique entry to the loop.
4384 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4385 BasicBlock* newH = fgNewBBafter(BBJ_NONE,
4387 /*extendRegion*/true);
4388 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4389 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4390 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4391 newH->bbNatLoopNum = ambientLoop;
4393 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4396 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4397 // "newPred" will be the predecessor of the blocks of the cloned loop.
4398 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4399 BasicBlock* newPred = b;
4400 if (b->bbJumpKind != BBJ_ALWAYS)
4402 BasicBlock* x = b->bbNext;
4405 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/true);
4406 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4408 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4409 x2->bbNatLoopNum = ambientLoop;
4412 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4417 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4418 // so that "h" already falls through to "e" (e == t == f).
4419 BasicBlock* h2 = nullptr;
4420 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4422 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS,
4423 optLoopTable[loopInd].lpHead,
4424 /*extendRegion*/true);
4425 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4427 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4428 h2->bbNatLoopNum = ambientLoop;
4430 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4431 optUpdateLoopHead(loopInd,optLoopTable[loopInd].lpHead, h2);
4434 // Now we'll clone the blocks of the loop body.
4435 BasicBlock* newFirst = nullptr;
4436 BasicBlock* newBot = nullptr;
4438 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4439 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst;
4440 blk != optLoopTable[loopInd].lpBottom->bbNext;
4443 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind,
4445 /*extendRegion*/true);
4447 BasicBlock::CloneBlockState(this, newBlk, blk);
4448 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert the
4449 // cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding loop, if one
4450 // exists -- the parent of the loop we're cloning.
4451 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4453 if (newFirst == nullptr) newFirst = newBlk;
4454 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4456 blockMap->Set(blk, newBlk);
4459 // Perform the static optimizations on the fast path.
4460 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4462 // Now go through the new blocks, remapping their jump targets within the loop.
4463 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst;
4464 blk != optLoopTable[loopInd].lpBottom->bbNext;
4468 BasicBlock* newblk = nullptr;
4469 bool b = blockMap->Lookup(blk, &newblk);
4470 assert(b && newblk != nullptr);
4472 assert(blk->bbJumpKind == newblk->bbJumpKind);
4474 // First copy the jump destination(s) from "blk".
4475 optCopyBlkDest(blk, newblk);
4477 // Now redirect the new block according to "blockMap".
4478 optRedirectBlock(newblk, blockMap);
4481 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry))
4482 || (h->bbJumpKind == BBJ_ALWAYS));
4484 // If all the conditions are true, go to E2.
4485 BasicBlock* e2 = nullptr;
4486 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4488 h->bbJumpKind = BBJ_COND;
4490 // We will create the following structure
4492 // cond0 (in h) -?> cond1
4493 // slow --> e2 (slow) always
4500 // We should always have block conditions, at the minimum, the array should be deref-able
4501 assert(context->HasBlockConditions(loopInd));
4503 // Create a unique header for the slow path.
4504 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4505 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4506 slowHead->bbNatLoopNum = ambientLoop;
4507 slowHead->bbJumpDest = e2;
4509 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4510 condLast->bbJumpDest = slowHead;
4512 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4515 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4517 assert(foundIt && e2 != NULL);
4519 fgUpdateChangedFlowGraph();
4522 //--------------------------------------------------------------------------------------------------
4523 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4526 // context loop cloning context variable
4527 // loopNum the loop index
4528 // head loop head for "loopNum"
4529 // slowHead the slow path loop head
4535 // Create the following structure.
4537 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4538 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4540 // cond0 (in h) -?> cond1
4541 // slowHead --> e2 (slowHead) always
4542 // !cond1 -?> slowHead
4543 // !cond2 -?> slowHead
4545 // !condn -?> slowHead
4548 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4550 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, unsigned loopNum, BasicBlock* head, BasicBlock* slowHead)
4552 JITDUMP("Inserting loop cloning conditions\n");
4553 assert(context->HasBlockConditions(loopNum));
4555 BasicBlock* curCond = head;
4556 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4557 for (unsigned i = 0; i < levelCond->Size(); ++i)
4559 bool isHeaderBlock = (curCond == head);
4561 // Flip the condition if header block.
4562 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4564 // Create each condition block ensuring wiring between them.
4565 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4566 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4569 curCond->inheritWeight(head);
4570 curCond->bbNatLoopNum = head->bbNatLoopNum;
4571 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4574 // Finally insert cloning conditions after all deref conditions have been inserted.
4575 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4579 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4581 BasicBlock* h = optLoopTable[loopInd].lpHead;
4582 BasicBlock* t = optLoopTable[loopInd].lpTop;
4583 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4584 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4586 // If "h" dominates the entry block, then it is the unique header.
4587 if (fgDominate(h, e)) return;
4589 // Otherwise, create a new empty header block, make it the pred of the entry block,
4590 // and redirect the preds of the entry block to go to this.
4592 BasicBlock* beforeTop = t->bbPrev;
4593 // Make sure that the new block is in the same region as the loop.
4594 // (We will only create loops that are entirely within a region.)
4595 BasicBlock * h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4596 // This is in the containing loop.
4597 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4598 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4600 // We don't care where it was put; splice it between beforeTop and top.
4601 if (beforeTop->bbNext != h2)
4603 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4604 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4608 if (h2->bbNext != e)
4610 h2->bbJumpKind = BBJ_ALWAYS;
4613 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4615 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4616 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4617 blockMap->Set(e, h2);
4619 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4621 BasicBlock* predBlock = predEntry->flBlock;
4623 // Skip if predBlock is in the loop.
4624 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum) continue;
4625 optRedirectBlock(predBlock, blockMap);
4628 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4631 /*****************************************************************************
4633 * Determine the kind of interference for the call.
4637 Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4639 assert(call->gtOper == GT_CALL);
4641 // if not a helper, kills everything
4642 if (call->gtCall.gtCallType != CT_HELPER)
4645 // setfield and array address store kill all indirections
4646 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4648 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4649 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4650 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4651 case CORINFO_HELP_SETFIELDOBJ:
4652 case CORINFO_HELP_ARRADDR_ST:
4654 return CALLINT_REF_INDIRS;
4656 case CORINFO_HELP_SETFIELDFLOAT:
4657 case CORINFO_HELP_SETFIELDDOUBLE:
4658 case CORINFO_HELP_SETFIELD8:
4659 case CORINFO_HELP_SETFIELD16:
4660 case CORINFO_HELP_SETFIELD32:
4661 case CORINFO_HELP_SETFIELD64:
4663 return CALLINT_SCL_INDIRS;
4665 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4666 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4667 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4668 case CORINFO_HELP_SETFIELDSTRUCT:
4670 return CALLINT_ALL_INDIRS;
4676 // other helpers kill nothing
4677 return CALLINT_NONE;
4680 /*****************************************************************************
4682 * See if the given tree can be computed in the given precision (which must
4683 * be smaller than the type of the tree for this to make sense). If 'doit'
4684 * is false, we merely check to see whether narrowing is possible; if we
4685 * get called with 'doit' being true, we actually perform the narrowing.
4688 bool Compiler::optNarrowTree(GenTreePtr tree,
4691 ValueNumPair vnpNarrow,
4698 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4700 /* Assume we're only handling integer types */
4701 noway_assert(varTypeIsIntegral(srct));
4702 noway_assert(varTypeIsIntegral(dstt));
4704 unsigned srcSize = genTypeSize(srct);
4705 unsigned dstSize = genTypeSize(dstt);
4707 /* dstt must be smaller than srct to narrow */
4708 if (dstSize >= srcSize)
4711 /* Figure out what kind of a node we have */
4712 oper = tree->OperGet();
4713 kind = tree->OperKind();
4715 if (kind & GTK_ASGOP)
4717 noway_assert(doit == false);
4721 ValueNumPair NoVNPair = ValueNumPair();
4723 if (kind & GTK_LEAF)
4727 /* Constants can usually be narrowed by changing their value */
4729 #ifndef _TARGET_64BIT_
4734 lval = tree->gtIntConCommon.LngValue();
4739 case TYP_BYTE : lmask = 0x0000007F; break;
4741 case TYP_UBYTE: lmask = 0x000000FF; break;
4742 case TYP_SHORT: lmask = 0x00007FFF; break;
4743 case TYP_CHAR : lmask = 0x0000FFFF; break;
4744 case TYP_INT : lmask = 0x7FFFFFFF; break;
4745 case TYP_UINT : lmask = 0xFFFFFFFF; break;
4747 default: return false;
4750 if ((lval & lmask) != lval)
4755 tree->ChangeOperConst (GT_CNS_INT);
4756 tree->gtType = TYP_INT;
4757 tree->gtIntCon.gtIconVal = (int) lval;
4758 if (vnStore != nullptr)
4760 fgValueNumberTreeConst(tree);
4769 ssize_t ival; ival = tree->gtIntCon.gtIconVal;
4770 ssize_t imask; imask = 0;
4774 case TYP_BYTE : imask = 0x0000007F; break;
4776 case TYP_UBYTE: imask = 0x000000FF; break;
4777 case TYP_SHORT: imask = 0x00007FFF; break;
4778 case TYP_CHAR : imask = 0x0000FFFF; break;
4779 #ifdef _TARGET_64BIT_
4780 case TYP_INT : imask = 0x7FFFFFFF; break;
4781 case TYP_UINT : imask = 0xFFFFFFFF; break;
4782 #endif // _TARGET_64BIT_
4783 default: return false;
4786 if ((ival & imask) != ival)
4789 #ifdef _TARGET_64BIT_
4792 tree->gtType = TYP_INT;
4793 tree->gtIntCon.gtIconVal = (int) ival;
4794 if (vnStore != nullptr)
4796 fgValueNumberTreeConst(tree);
4799 #endif // _TARGET_64BIT_
4803 /* Operands that are in memory can usually be narrowed
4804 simply by changing their gtType */
4807 /* We only allow narrowing long -> int for a GT_LCL_VAR */
4808 if (dstSize == sizeof(int))
4819 noway_assert(doit == false);
4824 if (kind & (GTK_BINOP|GTK_UNOP))
4826 GenTreePtr op1; op1 = tree->gtOp.gtOp1;
4827 GenTreePtr op2; op2 = tree->gtOp.gtOp2;
4829 switch (tree->gtOper)
4832 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
4834 // Is op2 a small constant than can be narrowed into dstt?
4835 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
4836 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
4838 // We will change the type of the tree and narrow op2
4842 tree->gtType = genActualType(dstt);
4843 tree->SetVNs(vnpNarrow);
4845 optNarrowTree(op2, srct, dstt, NoVNPair, true);
4846 // We may also need to cast away the upper bits of op1
4849 assert(tree->gtType == TYP_INT);
4850 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
4852 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
4854 tree->gtOp.gtOp1 = op1;
4865 if (tree->gtOverflow() || varTypeIsSmall(dstt))
4867 noway_assert(doit == false);
4875 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
4876 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
4878 if (gtIsActiveCSE_Candidate(op1) ||
4879 gtIsActiveCSE_Candidate(op2) ||
4880 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) ||
4881 !optNarrowTree(op2, srct, dstt, NoVNPair, doit) )
4883 noway_assert(doit == false);
4887 /* Simply change the type of the tree */
4891 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
4892 tree->gtFlags &= ~GTF_MUL_64RSLT;
4894 tree->gtType = genActualType(dstt);
4895 tree->SetVNs(vnpNarrow);
4903 /* Simply change the type of the tree */
4905 if (doit && (dstSize <= genTypeSize(tree->gtType)))
4907 tree->gtType = genSignedType(dstt);
4908 tree->SetVNs(vnpNarrow);
4910 /* Make sure we don't mess up the variable type */
4911 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
4912 tree->gtFlags |= GTF_VAR_CAST;
4924 /* These can always be narrowed since they only represent 0 or 1 */
4929 var_types cast = tree->CastToType();
4930 var_types oprt = op1->TypeGet();
4931 unsigned oprSize = genTypeSize(oprt);
4936 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
4939 if (tree->gtOverflow())
4942 /* Is this a cast from the type we're narrowing to or a smaller one? */
4944 if (oprSize <= dstSize)
4946 /* Bash the target type of the cast */
4950 dstt = genSignedType(dstt);
4952 if (oprSize == dstSize)
4954 // Same size: change the CAST into a NOP
4955 tree->ChangeOper (GT_NOP);
4956 tree->gtType = dstt;
4957 tree->gtOp.gtOp2 = nullptr;
4958 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
4962 // oprSize is smaller
4963 assert(oprSize < dstSize);
4965 // Change the CastToType in the GT_CAST node
4966 tree->CastToType() = dstt;
4968 // The result type of a GT_CAST is never a small type.
4969 // Use genActualType to widen dstt when it is a small types.
4970 tree->gtType = genActualType(dstt);
4971 tree->SetVNs(vnpNarrow);
4981 if (!gtIsActiveCSE_Candidate(op2) &&
4982 optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
4984 /* Simply change the type of the tree */
4988 tree->gtType = genActualType(dstt);
4989 tree->SetVNs(vnpNarrow);
4996 noway_assert(doit == false);
5006 /*****************************************************************************
5008 * The following logic figures out whether the given variable is assigned
5009 * somewhere in a list of basic blocks (or in an entire loop).
5013 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr *pTree, fgWalkData *data)
5015 GenTreePtr tree = *pTree;
5017 if (tree->OperKind() & GTK_ASGOP)
5019 GenTreePtr dest = tree->gtOp.gtOp1;
5020 genTreeOps destOper = dest->OperGet();
5022 isVarAssgDsc * desc = (isVarAssgDsc*)data->pCallbackData;
5023 assert(desc && desc->ivaSelf == desc);
5025 if (destOper == GT_LCL_VAR)
5027 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5028 if (tvar < lclMAX_ALLSET_TRACKED)
5029 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5031 desc->ivaMaskIncomplete = true;
5033 if (tvar == desc->ivaVar)
5035 if (tree != desc->ivaSkip)
5039 else if (destOper == GT_LCL_FLD)
5041 /* We can't track every field of every var. Moreover, indirections
5042 may access different parts of the var as different (but
5043 overlapping) fields. So just treat them as indirect accesses */
5045 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5046 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5048 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF
5050 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5052 else if (destOper == GT_CLS_VAR)
5054 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5056 else if (destOper == GT_IND)
5058 /* Set the proper indirection bits */
5060 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF
5062 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5065 else if (tree->gtOper == GT_CALL)
5067 isVarAssgDsc * desc = (isVarAssgDsc*)data->pCallbackData;
5068 assert(desc && desc->ivaSelf == desc);
5070 desc->ivaMaskCall = optCallInterf(tree);
5073 return WALK_CONTINUE;
5076 /*****************************************************************************/
5078 bool Compiler::optIsVarAssigned(BasicBlock * beg,
5086 desc.ivaSkip = skip;
5088 desc.ivaSelf = &desc;
5091 desc.ivaMaskCall = CALLINT_NONE;
5092 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5098 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5100 noway_assert(stmt->gtOper == GT_STMT);
5101 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5121 /*****************************************************************************/
5122 int Compiler::optIsSetAssgLoop(unsigned lnum,
5123 ALLVARSET_VALARG_TP vars,
5128 /* Get hold of the loop descriptor */
5130 noway_assert(lnum < optLoopCount);
5131 loop = optLoopTable + lnum;
5133 /* Do we already know what variables are assigned within this loop? */
5135 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5142 /* Prepare the descriptor used by the tree walker call-back */
5144 desc.ivaVar = (unsigned)-1;
5145 desc.ivaSkip = NULL;
5147 desc.ivaSelf = &desc;
5149 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5150 desc.ivaMaskInd = VR_NONE;
5151 desc.ivaMaskCall = CALLINT_NONE;
5152 desc.ivaMaskIncomplete = false;
5154 /* Now walk all the statements of the loop */
5156 beg = loop->lpHead->bbNext;
5157 end = loop->lpBottom;
5159 for (/**/; /**/; beg = beg->bbNext)
5163 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5165 noway_assert(stmt->gtOper == GT_STMT);
5166 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5168 if (desc.ivaMaskIncomplete)
5170 loop->lpFlags |= LPFLG_ASGVARS_INC;
5178 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5179 loop->lpAsgInds = desc.ivaMaskInd;
5180 loop->lpAsgCall = desc.ivaMaskCall;
5182 /* Now we know what variables are assigned in the loop */
5184 loop->lpFlags |= LPFLG_ASGVARS_YES;
5187 /* Now we can finally test the caller's mask against the loop's */
5188 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) ||
5189 (loop->lpAsgInds & inds))
5194 switch (loop->lpAsgCall)
5198 /* Can't hoist if the call might have side effect on an indirection. */
5200 if (loop->lpAsgInds != VR_NONE)
5205 case CALLINT_REF_INDIRS:
5207 /* Can't hoist if the call might have side effect on an ref indirection. */
5209 if (loop->lpAsgInds & VR_IND_REF)
5214 case CALLINT_SCL_INDIRS:
5216 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5218 if (loop->lpAsgInds & VR_IND_SCL)
5223 case CALLINT_ALL_INDIRS:
5225 /* Can't hoist if the call might have side effect on any indirection. */
5227 if (loop->lpAsgInds & (VR_IND_REF|VR_IND_SCL))
5234 /* Other helpers kill nothing */
5239 noway_assert(!"Unexpected lpAsgCall value");
5245 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5250 printf("\nHoisting a copy of ");
5251 printTreeID(origExpr);
5252 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n",
5253 lnum, optLoopTable[lnum].lpFirst->bbNum, optLoopTable[lnum].lpBottom->bbNum);
5254 gtDispTree(origExpr);
5259 // This loop has to be in a form that is approved for hoisting.
5260 assert (optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5262 // Create a copy of the expression and mark it for CSE's.
5263 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5265 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5266 assert(hoistExpr != origExpr);
5267 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5269 GenTreePtr hoist = hoistExpr;
5270 // The value of the expression isn't used (unless it's an assignment).
5271 if (hoistExpr->OperGet() != GT_ASG)
5273 hoist = gtUnusedValNode(hoistExpr);
5276 /* Put the statement in the preheader */
5278 fgCreateLoopPreHeader(lnum);
5280 BasicBlock * preHead = optLoopTable[lnum].lpHead;
5281 assert (preHead->bbJumpKind == BBJ_NONE);
5283 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5284 // (or in this case, will contain) the expression.
5285 compCurBB = preHead;
5287 // Increment the ref counts of any local vars appearing in "hoist".
5288 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5289 // fold away some of the lcl vars referenced by "hoist".
5290 lvaRecursiveIncRefCounts(hoist);
5292 hoist = fgMorphTree(hoist);
5294 GenTreePtr hoistStmt = gtNewStmt(hoist);
5295 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5297 /* simply append the statement at the end of the preHead's list */
5299 GenTreePtr treeList = preHead->bbTreeList;
5303 /* append after last statement */
5305 GenTreePtr last = treeList->gtPrev;
5306 assert (last->gtNext == 0);
5308 last->gtNext = hoistStmt;
5309 hoistStmt->gtPrev = last;
5310 treeList->gtPrev = hoistStmt;
5314 /* Empty pre-header - store the single statement in the block */
5316 preHead->bbTreeList = hoistStmt;
5317 hoistStmt->gtPrev = hoistStmt;
5320 hoistStmt->gtNext = nullptr;
5325 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5330 if (fgStmtListThreaded)
5332 gtSetStmtInfo(hoistStmt);
5333 fgSetStmtSeq(hoistStmt);
5337 if (m_nodeTestData != NULL)
5340 // What is the depth of the loop "lnum"?
5342 unsigned lnumIter = lnum;
5343 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5346 lnumIter = optLoopTable[lnumIter].lpParent;
5349 NodeToTestDataMap* testData = GetNodeTestData();
5351 TestLabelAndNum tlAndN;
5352 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5354 if (tlAndN.m_num == -1)
5357 printTreeID(origExpr);
5358 printf(" was declared 'do not hoist', but is being hoisted.\n");
5361 else if (tlAndN.m_num != depth)
5364 printTreeID(origExpr);
5365 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth %d.\n",
5366 tlAndN.m_num, depth);
5371 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must hoist" annotations.
5372 testData->Remove(origExpr);
5373 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5374 tlAndN.m_tl = TL_CSE_Def;
5375 tlAndN.m_num = m_loopHoistCSEClass++;
5376 testData->Set(hoistExpr, tlAndN);
5382 #if LOOP_HOIST_STATS
5383 if (!m_curLoopHasHoistedExpression)
5385 m_loopsWithHoistedExpressions++;
5386 m_curLoopHasHoistedExpression = true;
5388 m_totalHoistedExpressions++;
5389 #endif // LOOP_HOIST_STATS
5392 void Compiler::optHoistLoopCode()
5394 // If we don't have any loops in the method then take an early out now.
5395 if (optLoopCount == 0)
5399 unsigned jitNoHoist = JitConfig.JitNoHoist();
5407 // The code in this #if has been useful in debugging loop cloning issues, by
5408 // enabling selective enablement of the loop cloning optimization according to
5411 unsigned methHash = info.compMethodHash();
5412 char* lostr = getenv("loophoisthashlo");
5413 unsigned methHashLo = 0;
5416 sscanf_s(lostr, "%x", &methHashLo);
5417 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5419 char* histr = getenv("loophoisthashhi");
5420 unsigned methHashHi = UINT32_MAX;
5423 sscanf_s(histr, "%x", &methHashHi);
5424 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5426 if (methHash < methHashLo || methHash > methHashHi)
5428 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5430 #endif // 0 -- debugging loop cloning issues
5435 printf("\n*************** In optHoistLoopCode()\n");
5436 printf("Blocks/Trees before phase\n");
5437 fgDispBasicBlocks(true);
5442 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5443 // they are invariant.)
5444 LoopHoistContext hoistCtxt(this);
5445 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5447 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5450 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5452 optHoistLoopNest(lnum, &hoistCtxt);
5461 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5462 fgDispBasicBlocks(true);
5466 // Make sure that the predecessor lists are accurate
5467 fgDebugCheckBBlist();
5471 // Test Data stuff..
5473 // If we have no test data, early out.
5474 if (m_nodeTestData == NULL) return;
5475 NodeToTestDataMap* testData = GetNodeTestData();
5476 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5478 TestLabelAndNum tlAndN;
5479 GenTreePtr node = ki.Get();
5480 bool b = testData->Lookup(node, &tlAndN);
5482 if (tlAndN.m_tl != TL_LoopHoist) continue;
5483 // Otherwise, it is a loop hoist annotation.
5484 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5485 if (tlAndN.m_num >= 0)
5489 printf(" was declared 'must hoist', but has not been hoisted.\n");
5496 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5498 // Do this loop, then recursively do all nested loops.
5500 #if LOOP_HOIST_STATS
5502 m_curLoopHasHoistedExpression = false;
5503 m_loopsConsidered++;
5504 #endif // LOOP_HOIST_STATS
5506 optHoistThisLoop(lnum, hoistCtxt);
5508 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5510 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5512 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5513 // TODO-Cleanup: we should have a set abstraction for loops.
5514 if (hoistedInCurLoop != nullptr)
5516 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5520 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5522 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5526 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling)
5528 optHoistLoopNest(child, hoistCtxt);
5532 // TODO-Cleanup: we should have a set abstraction for loops.
5533 if (hoistedInCurLoop != nullptr)
5535 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5537 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5538 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5544 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5546 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5548 /* If loop was removed continue */
5550 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5553 /* Get the head and tail of the loop */
5555 BasicBlock* head = pLoopDsc->lpHead;
5556 BasicBlock* tail = pLoopDsc->lpBottom;
5557 BasicBlock* lbeg = pLoopDsc->lpEntry;
5560 // We must have a do-while loop
5561 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5564 // The loop-head must dominate the loop-entry.
5565 // TODO-CQ: Couldn't we make this true if it's not?
5566 if (!fgDominate(head, lbeg))
5569 // if lbeg is the start of a new try block then we won't be able to hoist
5570 if (!BasicBlock::sameTryRegion(head, lbeg))
5573 // We don't bother hoisting when inside of a catch block
5574 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5577 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5579 unsigned begn = lbeg->bbNum;
5580 unsigned endn = tail->bbNum;
5582 // Ensure the per-loop sets/tables are empty.
5583 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5588 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5589 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5593 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut,
5594 pLoopDsc->lpVarUseDef));
5596 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5597 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5598 pLoopDsc->lpHoistedExprCount = 0;
5600 #ifndef _TARGET_64BIT_
5601 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5603 if (longVarsCount > 0)
5605 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5606 // the Counts such that each TYP_LONG variable counts twice.
5608 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5609 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5614 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5615 lvaDispVarSet(lvaLongVars);
5618 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5619 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5621 #endif // !_TARGET_64BIT_
5626 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5627 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5629 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5630 lvaDispVarSet(pLoopDsc->lpVarInOut);
5632 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5633 lvaDispVarSet(loopVars);
5638 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5640 if (floatVarsCount > 0)
5642 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5643 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5645 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5646 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5647 pLoopDsc->lpHoistedFPExprCount = 0;
5649 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5650 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5655 printf( " INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5656 lvaDispVarSet(inOutFPVars);
5658 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5659 lvaDispVarSet(loopFPVars);
5663 else // (floatVarsCount == 0)
5665 pLoopDsc->lpLoopVarFPCount = 0;
5666 pLoopDsc->lpVarInOutFPCount = 0;
5667 pLoopDsc->lpHoistedFPExprCount = 0;
5670 // Find the set of definitely-executed blocks.
5671 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5672 // Until we have post-dominators, we'll special-case for single-exit blocks.
5673 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5674 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5676 assert(pLoopDsc->lpExit != nullptr);
5677 BasicBlock* cur = pLoopDsc->lpExit;
5678 // Push dominators, until we reach "entry" or exit the loop.
5679 while ( cur != nullptr
5680 && pLoopDsc->lpContains(cur)
5681 && cur != pLoopDsc->lpEntry)
5686 // If we didn't reach the entry block, give up and *just* push the entry block.
5687 if (cur != pLoopDsc->lpEntry)
5691 defExec.Push(pLoopDsc->lpEntry);
5693 else // More than one exit
5695 // We'll assume that only the entry block is definitely executed.
5696 // We could in the future do better.
5697 defExec.Push(pLoopDsc->lpEntry);
5700 while (defExec.Size() > 0)
5702 // Consider in reverse order: dominator before dominatee.
5703 BasicBlock* blk = defExec.Pop();
5704 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5708 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5709 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk,
5711 LoopHoistContext* hoistCtxt)
5713 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5714 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5715 unsigned blkWeight = blk->getBBWeight(this);
5720 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
5722 refCntWtd2str(blkWeight),
5724 pLoopDsc->lpFirst->bbNum,
5725 pLoopDsc->lpBottom->bbNum,
5726 firstBlockAndBeforeSideEffect ? "true" : "false");
5727 if (blkWeight < (BB_UNITY_WEIGHT / 10))
5729 printf(" block weight is too small to perform hoisting.\n");
5734 if (blkWeight < (BB_UNITY_WEIGHT / 10))
5736 // Block weight is too small to perform hoisting.
5740 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5742 GenTreePtr stmtTree = stmt->gtStmtExpr;
5744 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
5747 // we will try to hoist the top-level stmtTree
5748 optHoistCandidate(stmtTree, lnum, hoistCtxt);
5753 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
5755 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5757 bool loopContainsCall = pLoopDsc->lpContainsCall;
5760 int hoistedExprCount;
5764 if (varTypeIsFloating(tree->TypeGet()))
5766 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
5767 loopVarCount = pLoopDsc->lpLoopVarFPCount;
5768 varInOutCount = pLoopDsc->lpVarInOutFPCount;
5770 availRegCount = CNT_CALLEE_SAVED_FLOAT;
5771 if (!loopContainsCall)
5773 availRegCount += CNT_CALLEE_TRASH_FLOAT-1;
5776 // For ARM each double takes two FP registers
5777 // For now on ARM we won't track singles/doubles
5778 // and instead just assume that we always have doubles.
5785 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
5786 loopVarCount = pLoopDsc->lpLoopVarCount;
5787 varInOutCount = pLoopDsc->lpVarInOutCount;
5789 availRegCount = CNT_CALLEE_SAVED-1;
5790 if (!loopContainsCall)
5792 availRegCount += CNT_CALLEE_TRASH-1;
5794 #ifndef _TARGET_64BIT_
5795 // For our 32-bit targets Long types take two registers.
5796 if (varTypeIsLong(tree->TypeGet()))
5798 availRegCount = (availRegCount+1) / 2;
5803 // decrement the availRegCount by the count of expression that we have already hoisted.
5804 availRegCount -= hoistedExprCount;
5806 // the variables that are read/written inside the loop should
5807 // always be a subset of the InOut variables for the loop
5808 assert(loopVarCount <= varInOutCount);
5810 // When loopVarCount >= availRegCount we believe that all of the
5811 // available registers will get used to hold LclVars inside the loop.
5812 // This pessimistically assumes that each loopVar has a conflicting
5813 // lifetime with every other loopVar.
5814 // For this case we will hoist the expression only if is profitable
5815 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
5816 // as we believe it will be placed in the stack or one of the other
5817 // loopVars will be spilled into the stack
5819 if (loopVarCount >= availRegCount)
5821 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
5822 if (tree->gtCostEx < (2*IND_COST_EX))
5826 // When varInOutCount < availRegCount we are know that there are
5827 // some available register(s) when we enter the loop body.
5828 // When varInOutCount == availRegCount there often will be a register
5829 // available when we enter the loop body, since a loop often defines a
5830 // LclVar on exit or there is often at least one LclVar that is worth
5831 // spilling to the stack to make way for this hoisted expression.
5832 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
5834 if (varInOutCount > availRegCount)
5836 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
5837 if (tree->gtCostEx <= MIN_CSE_COST+1)
5845 // This function returns true if 'tree' is a loop invariant expression.
5846 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
5848 bool Compiler::optHoistLoopExprsForTree(GenTreePtr tree,
5850 LoopHoistContext* hoistCtxt,
5851 bool* pFirstBlockAndBeforeSideEffect,
5854 // First do the children.
5855 // We must keep track of whether each child node was hoistable or not
5857 unsigned nChildren = tree->NumChildren();
5858 bool childrenHoistable[GenTree::MAX_CHILDREN];
5860 // Initialize the array elements for childrenHoistable[] to false
5861 for (unsigned i = 0; i < nChildren; i++)
5863 childrenHoistable[i] = false;
5866 bool treeIsInvariant = true;
5867 for (unsigned childNum = 0; childNum < nChildren; childNum++)
5869 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect, &childrenHoistable[childNum]))
5871 treeIsInvariant = false;
5875 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
5877 bool treeIsHoistable = treeIsInvariant;
5879 // But we must see if anything else prevents "tree" from being hoisted.
5881 if (treeIsInvariant)
5883 // Tree must be a suitable CSE candidate for us to be able to hoist it.
5884 treeIsHoistable = optIsCSEcandidate(tree);
5886 // If it's a call, it must be a helper call, and be pure.
5887 // Further, if it may run a cctor, it must be labeled as "Hoistable"
5888 // (meaning it won't run a cctor because the class is not precise-init).
5889 if (treeIsHoistable && tree->OperGet() == GT_CALL)
5891 GenTreeCall* call = tree->AsCall();
5892 if (call->gtCallType != CT_HELPER)
5894 treeIsHoistable = false;
5898 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
5899 if (!s_helperCallProperties.IsPure(helpFunc))
5901 treeIsHoistable = false;
5903 else if ( s_helperCallProperties.MayRunCctor(helpFunc)
5904 && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
5906 treeIsHoistable = false;
5911 if (treeIsHoistable)
5913 if (!(*pFirstBlockAndBeforeSideEffect))
5915 // For now, we give up on an expression that might raise an exception if it is after the
5916 // first possible global side effect (and we assume we're after that if we're not in the first block).
5917 //TODO-CQ: this is when we might do loop cloning.
5919 if ((tree->gtFlags & GTF_EXCEPT) != 0)
5921 treeIsHoistable = false;
5924 // Currently we must give up on reads from static variables (even if we are in the first block).
5926 if (tree->OperGet() == GT_CLS_VAR)
5928 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe method Main
5929 treeIsHoistable = false;
5933 // Is the value of the whole tree loop invariant?
5934 treeIsInvariant = optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
5936 // Is the value of the whole tree loop invariant?
5937 if (!treeIsInvariant)
5939 treeIsHoistable = false;
5943 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
5944 // If we encounter a tree with a call in it
5945 // or if we see an assignment to global we set it to false.
5947 // If we are already set to false then we can skip these checks
5949 if (*pFirstBlockAndBeforeSideEffect)
5951 // For this purpose, we only care about memory side effects. We assume that expressions will
5952 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
5953 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
5954 // here, since that includes exceptions.)
5955 if (tree->gtFlags & GTF_CALL)
5957 *pFirstBlockAndBeforeSideEffect = false;
5959 else if (tree->OperIsAssignment())
5961 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
5962 GenTreePtr lhs = tree->gtOp.gtOp1;
5963 if (lhs->gtFlags & GTF_GLOB_REF)
5965 *pFirstBlockAndBeforeSideEffect = false;
5967 } else if (tree->OperIsCopyBlkOp())
5969 GenTreePtr args = tree->gtOp.gtOp1;
5970 assert(args->OperGet() == GT_LIST);
5971 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
5973 *pFirstBlockAndBeforeSideEffect = false;
5978 // If this 'tree' is hoistable then we return and the caller will
5979 // decide to hoist it as part of larger hoistable expression.
5981 if (!treeIsHoistable)
5983 // We are not hoistable so we will now hoist any hoistable children.
5985 for (unsigned childNum = 0; childNum < nChildren; childNum++)
5987 if (childrenHoistable[childNum])
5989 // We can't hoist the LHS of an assignment, isn't a real use.
5990 if (childNum == 0 && (tree->OperIsAssignment()))
5993 GenTreePtr child = tree->GetChild(childNum);
5995 // We try to hoist this 'child' tree
5996 optHoistCandidate(child, lnum, hoistCtxt);
6001 *pHoistable = treeIsHoistable;
6002 return treeIsInvariant;
6005 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6007 if (lnum == BasicBlock::NOT_IN_LOOP)
6009 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6013 // The outer loop also must be suitable for hoisting...
6014 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6017 // If the hoisted expression isn't valid at this loop head then break
6018 if (!optTreeIsValidAtLoopHead(tree, lnum))
6021 // It must pass the hoistable profitablity tests for this loop level
6022 if (!optIsProfitableToHoistableTree(tree, lnum))
6027 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6029 // already hoisted in a parent loop, so don't hoist this expression.
6033 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6035 // already hoisted this expression in the current loop, so don't hoist this expression.
6039 // Expression can be hoisted
6040 optPerformHoistExpr(tree, lnum);
6042 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6043 if (!varTypeIsFloating(tree->TypeGet()))
6045 optLoopTable[lnum].lpHoistedExprCount++;
6046 #ifndef _TARGET_64BIT_
6047 // For our 32-bit targets Long types take two registers.
6048 if (varTypeIsLong(tree->TypeGet()))
6050 optLoopTable[lnum].lpHoistedExprCount++;
6054 else // Floating point expr hoisted
6056 optLoopTable[lnum].lpHoistedFPExprCount++;
6059 // Record the hoisted expression in hoistCtxt
6060 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6064 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6066 // If it is not a VN, is not loop-invariant.
6067 if (vn == ValueNumStore::NoVN) return false;
6069 // We'll always short-circuit constants.
6070 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6073 // If we've done this query previously, don't repeat.
6074 bool previousRes = false;
6075 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6080 if (vnStore->GetVNFunc(vn, &funcApp))
6082 if (funcApp.m_func == VNF_PhiDef)
6084 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6085 VNFuncApp phiDefValFuncApp;
6086 if ( !vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp)
6087 || phiDefValFuncApp.m_func != VNF_Phi)
6089 // It's not *really* a definition, rather a pass-through of some other VN.
6090 // (This could occur, say if both sides of an if-then-else diamond made the
6091 // same assignment to a variable.)
6092 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6096 // Is the definition within the loop? If so, is not loop-invariant.
6097 unsigned lclNum = funcApp.m_args[0];
6098 unsigned ssaNum = funcApp.m_args[1];
6099 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6100 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6103 else if (funcApp.m_func == VNF_PhiHeapDef)
6105 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6106 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6110 for (unsigned i = 0; i < funcApp.m_arity; i++)
6112 // TODO-CQ: We need to either make sure that *all* VN functions
6113 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6115 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6125 // Otherwise, assume non-function "new, unique" VN's are not loop invariant.
6129 loopVnInvariantCache->Set(vn, res);
6133 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6135 if (tree->OperIsLocal())
6137 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6138 unsigned lclNum = lclVar->gtLclNum;
6140 // The lvlVar must be have an Ssa tracked lifetime
6141 if (fgExcludeFromSsa(lclNum))
6144 // If the loop does not contains the SSA def we can hoist it.
6145 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6148 else if (tree->OperIsConst())
6152 else // If every one of the children nodes are valid at this Loop's Head.
6154 unsigned nChildren = tree->NumChildren();
6155 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6157 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6166 /*****************************************************************************
6168 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6169 * header. The pre-header will replace the current lpHead in the loop table.
6170 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6171 * will also be dominated by the loop-top, lpHead->bbNext.
6175 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6177 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6179 /* This loop has to be a "do-while" loop */
6181 assert (pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6183 /* Have we already created a loop-preheader block? */
6185 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6188 BasicBlock* head = pLoopDsc->lpHead;
6189 BasicBlock* top = pLoopDsc->lpTop;
6190 BasicBlock* entry = pLoopDsc->lpEntry;
6192 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6193 if (!BasicBlock::sameTryRegion(head, entry))
6196 // Ensure that lpHead always dominates lpEntry
6198 noway_assert(fgDominate(head, entry));
6200 /* Get hold of the first block of the loop body */
6202 assert (top == entry);
6204 /* Allocate a new basic block */
6206 BasicBlock * preHead = bbNewBasicBlock(BBJ_NONE);
6207 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6209 // Must set IL code offset
6210 preHead->bbCodeOffs = top->bbCodeOffs;
6212 // Set the default value of the preHead weight in case we don't have
6213 // valid profile data and since this blocks weight is just an estimate
6214 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6216 preHead->inheritWeight(head);
6217 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6221 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n",
6222 preHead->bbNum, lnum, top->bbNum, pLoopDsc->lpBottom->bbNum,
6223 refCntWtd2str(preHead->getBBWeight(this)));
6226 // The preheader block is part of the containing loop (if any).
6227 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6229 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6231 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6233 preHead->bbWeight = 0;
6234 preHead->bbFlags |= BBF_RUN_RARELY;
6238 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0)
6239 && ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0)
6240 && ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6242 if (allValidProfileWeights)
6244 double loopEnteredCount;
6245 double loopSkippedCount;
6247 if (fgHaveValidEdgeWeights)
6249 flowList * edgeToNext = fgGetPredForBlock(head->bbNext, head);
6250 flowList * edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6251 noway_assert(edgeToNext != NULL);
6252 noway_assert(edgeToJump != NULL);
6254 loopEnteredCount = ((double) edgeToNext->flEdgeWeightMin + (double) edgeToNext->flEdgeWeightMax) / 2.0;
6255 loopSkippedCount = ((double) edgeToJump->flEdgeWeightMin + (double) edgeToJump->flEdgeWeightMax) / 2.0;
6259 loopEnteredCount = (double) head->bbNext->bbWeight;
6260 loopSkippedCount = (double) head->bbJumpDest->bbWeight;
6263 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6265 // Calculate a good approximation of the preHead's block weight
6266 unsigned preHeadWeight = (unsigned) (((double) head->bbWeight * loopTakenRatio) + 0.5);
6267 preHead->setBBWeight(max(preHeadWeight, 1));
6268 noway_assert(!preHead->isRunRarely());
6273 // Link in the preHead block.
6274 fgInsertBBbefore(top, preHead);
6276 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6277 // However, that is too expensive at this point. Instead, we update the phi
6278 // node block references, if we created pre-header block due to hoisting.
6279 // This is sufficient because any definition participating in SSA that flowed
6280 // into the phi via the loop header block will now flow through the preheader
6281 // block from the header block.
6283 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6285 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6286 if (tree->OperGet() != GT_ASG)
6290 GenTreePtr op2 = tree->gtGetOp2();
6291 if (op2->OperGet() != GT_PHI)
6295 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6296 while (args != nullptr)
6298 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6299 if (phiArg->gtPredBB == head)
6301 phiArg->gtPredBB = preHead;
6303 args = args->Rest();
6307 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6308 // to set the handler index on the pre header without updating the exception table.
6309 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6311 // Update the EH table to make the hoisted block part of the loop's EH block.
6312 fgExtendEHRegionBefore(top);
6314 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6315 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6317 /* Update the loop entry */
6319 pLoopDsc->lpHead = preHead;
6320 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6322 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6323 All predecessors of 'beg', (which is the entry in the loop)
6324 now have to jump to 'preHead', unless they are dominated by 'head' */
6326 preHead->bbRefs = 0;
6327 fgAddRefPred(preHead, head);
6328 bool checkNestedLoops = false;
6330 for (flowList * pred = top->bbPreds; pred; pred = pred->flNext)
6332 BasicBlock * predBlock = pred->flBlock;
6334 if (fgDominate(top, predBlock))
6336 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6337 // (we know that 'head' dominates 'top'), but using 'top' instead of
6338 // 'head' in the test allows us to not enter here if 'predBlock == head'
6340 if (predBlock != pLoopDsc->lpBottom)
6342 noway_assert(predBlock != head);
6343 checkNestedLoops = true;
6348 switch (predBlock->bbJumpKind)
6351 noway_assert(predBlock == head);
6355 if (predBlock == head)
6357 noway_assert(predBlock->bbJumpDest != top);
6363 case BBJ_EHCATCHRET:
6364 noway_assert(predBlock->bbJumpDest == top);
6365 predBlock->bbJumpDest = preHead;
6366 preHead->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
6368 if (predBlock == head)
6370 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6371 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6372 // Just break, pred will be removed after switch.
6376 fgRemoveRefPred(top, predBlock);
6377 fgAddRefPred(preHead, predBlock);
6382 unsigned jumpCnt; jumpCnt = predBlock->bbJumpSwt->bbsCount;
6383 BasicBlock * * jumpTab; jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6388 if ((*jumpTab) == top)
6390 (*jumpTab) = preHead;
6392 fgRemoveRefPred(top, predBlock);
6393 fgAddRefPred(preHead, predBlock);
6394 preHead->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
6397 while (++jumpTab, --jumpCnt);
6400 noway_assert(!"Unexpected bbJumpKind");
6405 noway_assert(!fgGetPredForBlock(top, preHead));
6406 fgRemoveRefPred(top, head);
6407 fgAddRefPred(top, preHead);
6410 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6411 (other than the back-edge of the loop we are considering) then we likely have nested
6412 do-while loops with the same entry block and inserting the preheader block changes the head
6413 of all the nested loops. Now we will update this piece of information in the loop table, and
6414 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6415 do-while loops with the same entry block).
6417 if (checkNestedLoops)
6419 for (unsigned l = 0; l < optLoopCount; l++)
6421 if (optLoopTable[l].lpHead == head)
6423 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6424 noway_assert(optLoopTable[l].lpEntry == top);
6425 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6426 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6429 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n",
6430 preHead->bbNum, l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6437 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6439 unsigned lnum = blk->bbNatLoopNum;
6440 while (lnum != BasicBlock::NOT_IN_LOOP)
6442 if (optLoopTable[lnum].lpEntry == blk)
6447 lnum = optLoopTable[lnum].lpParent;
6452 void Compiler::optComputeLoopSideEffects()
6455 for (lnum = 0; lnum < optLoopCount; lnum++)
6457 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6458 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6459 optLoopTable[lnum].lpContainsCall = false;
6462 for (lnum = 0; lnum < optLoopCount; lnum++)
6464 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6467 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) // Is outermost...
6468 optComputeLoopNestSideEffects(lnum);
6471 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6472 #ifndef _TARGET_64BIT_
6473 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6476 for (unsigned i = 0; i < lvaCount; i++)
6478 LclVarDsc* varDsc = &lvaTable[i];
6479 if (varDsc->lvTracked)
6481 if (varTypeIsFloating(varDsc->lvType))
6483 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6485 #ifndef _TARGET_64BIT_
6486 else if (varTypeIsLong(varDsc->lvType))
6488 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6495 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6497 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6498 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6499 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6501 optComputeLoopSideEffectsOfBlock(bbInLoop);
6505 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6507 unsigned mostNestedLoop = blk->bbNatLoopNum;
6508 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6510 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6512 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6514 // Now iterate over the remaining statements, and their trees.
6515 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6517 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6519 genTreeOps oper = tree->OperGet();
6521 // Even after we set heapHavoc we still may want to know if a loop contains calls
6524 if (oper == GT_CALL)
6526 // Record that this loop contains a call
6527 AddContainsCallAllContainingLoops(mostNestedLoop);
6530 // If we just set lpContainsCall or it was previously set
6531 if (optLoopTable[mostNestedLoop].lpContainsCall)
6533 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6537 // We are just looking for GT_CALL nodes after heapHavoc was set.
6541 // otherwise heapHavoc is not set
6544 // This body is a distillation of the heap-side effect code of value numbering.
6545 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6546 // that the compiler creates.
6548 if (GenTree::OperIsAssignment(oper))
6550 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
6552 if (lhs->OperGet() == GT_IND)
6554 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
6555 FieldSeqNode* fldSeqArrElem = nullptr;
6557 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6565 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6567 // If it's a local byref for which we recorded a value number, use that...
6568 GenTreeLclVar* argLcl = arg->AsLclVar();
6569 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6571 ValueNum argVN = lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6573 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) && funcApp.m_func == VNF_PtrToArrElem)
6575 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6576 CORINFO_CLASS_HANDLE elemType = CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6577 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6578 // Don't set heapHavoc below.
6585 // Is the LHS an array index expression?
6586 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6588 // We actually ignore "fldSeq" -- any modification to an S[], at any
6589 // field of "S", will lose all information about the array type.
6590 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6591 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6595 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6597 GenTreePtr obj = nullptr; // unused
6598 GenTreePtr staticOffset = nullptr; // unused
6599 FieldSeqNode* fldSeq = nullptr;
6601 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6602 (fldSeq != FieldSeqStore::NotAField()))
6604 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6605 assert(fldSeq != nullptr);
6606 if (fldSeq->IsFirstElemFieldSeq())
6608 fldSeq = fldSeq->m_next;
6609 assert(fldSeq != nullptr);
6612 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6620 else if (lhs->OperGet() == GT_CLS_VAR)
6622 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6624 // Otherwise, must be local lhs form. I should assert that.
6625 else if (lhs->OperGet() == GT_LCL_VAR)
6627 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6628 GenTreePtr rhs = tree->gtOp.gtOp2;
6629 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6630 // If we gave the RHS a value number, propagate it.
6631 if (rhsVN != ValueNumStore::NoVN)
6633 rhsVN = vnStore->VNNormVal(rhsVN);
6634 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6636 lvaTable[lhsLcl->GetLclNum()].GetPerSsaData(lhsLcl->GetSsaNum())->m_vnPair.SetLiberal(rhsVN);
6641 else // not GenTree::OperIsAssignment(oper)
6646 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
6650 // Is it an addr of a array index expression?
6652 GenTreePtr addrArg = tree->gtOp.gtOp1;
6653 if (addrArg->OperGet() == GT_IND)
6655 // Is the LHS an array index expression?
6656 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
6659 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
6661 CORINFO_CLASS_HANDLE elemType = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6662 tree->gtVNPair.SetBoth(vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
6663 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
6664 // The rest are dummy arguments.
6665 vnStore->VNForNull(),
6666 vnStore->VNForNull(),
6667 vnStore->VNForNull()));
6677 GenTreeLclVarCommon* lclVarTree;
6679 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6681 // For now, assume arbitrary side effects on the heap...
6682 // TODO-CQ: Why not be complete, and get this case right?
6688 case GT_LOCKADD: // Binop
6689 case GT_XADD: // Binop
6690 case GT_XCHG: // Binop
6691 case GT_CMPXCHG: // Specialop
6699 GenTreeCall* call = tree->AsCall();
6701 // Record that this loop contains a call
6702 AddContainsCallAllContainingLoops(mostNestedLoop);
6704 if (call->gtCallType == CT_HELPER)
6706 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6707 if (s_helperCallProperties.MutatesHeap(helpFunc))
6711 else if (s_helperCallProperties.MayRunCctor(helpFunc))
6713 // If the call is labeled as "Hoistable", then we've checked the
6714 // class that would be constructed, and it is not precise-init, so
6715 // the cctor will not be run by this call. Otherwise, it might be,
6716 // and might have arbitrary side effects.
6717 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
6731 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
6740 // Record that all loops containing this block have heap havoc effects.
6741 unsigned lnum = mostNestedLoop;
6742 while (lnum != BasicBlock::NOT_IN_LOOP)
6744 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
6745 lnum = optLoopTable[lnum].lpParent;
6750 // Marks the containsCall information to "lnum" and any parent loops.
6751 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
6753 assert(0 <= lnum && lnum < optLoopCount);
6754 while (lnum != BasicBlock::NOT_IN_LOOP)
6756 optLoopTable[lnum].lpContainsCall = true;
6757 lnum = optLoopTable[lnum].lpParent;
6761 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
6762 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock * blk)
6764 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
6765 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
6767 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
6768 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
6771 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
6772 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock * blk)
6774 assert(0 <= lnum && lnum < optLoopCount);
6775 while (lnum != BasicBlock::NOT_IN_LOOP)
6777 optLoopTable[lnum].AddVariableLiveness(this, blk);
6778 lnum = optLoopTable[lnum].lpParent;
6782 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
6783 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
6785 assert(0 <= lnum && lnum < optLoopCount);
6786 while (lnum != BasicBlock::NOT_IN_LOOP)
6788 optLoopTable[lnum].AddModifiedField(this, fldHnd);
6789 lnum = optLoopTable[lnum].lpParent;
6793 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
6794 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
6796 assert(0 <= lnum && lnum < optLoopCount);
6797 while (lnum != BasicBlock::NOT_IN_LOOP)
6799 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
6800 lnum = optLoopTable[lnum].lpParent;
6804 /*****************************************************************************
6806 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
6807 * The 'keepList'is either a single tree or a list of trees that are formed by
6808 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
6809 * gtExtractSideEffList method.
6813 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr *pTree, fgWalkData *data)
6815 GenTreePtr tree = *pTree;
6816 Compiler * comp = data->compiler;
6817 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
6819 // We may have a non-NULL side effect list that is being kept
6823 GenTreePtr keptTree = keepList;
6824 while (keptTree->OperGet() == GT_COMMA)
6826 assert(keptTree->OperKind() & GTK_SMPOP);
6827 GenTreePtr op1 = keptTree->gtOp.gtOp1;
6828 GenTreePtr op2 = keptTree->gtGetOp2();
6830 // For the GT_COMMA case the op1 is part of the orginal CSE tree
6831 // that is being kept because it contains some side-effect
6835 // This tree and all of its sub trees are being kept.
6836 return WALK_SKIP_SUBTREES;
6839 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
6840 // which can again be another GT_COMMA or the final side-effect part
6844 if (tree == keptTree)
6846 // This tree and all of its sub trees are being kept.
6847 return WALK_SKIP_SUBTREES;
6851 // This node is being removed from the graph of GenTreePtr
6853 // Look for any local variable references
6855 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
6860 /* This variable ref is going away, decrease its ref counts */
6862 lclNum = tree->gtLclVarCommon.gtLclNum;
6863 assert(lclNum < comp->lvaCount);
6864 varDsc = comp->lvaTable + lclNum;
6866 // make sure it's been initialized
6867 assert(comp->compCurBB != nullptr);
6868 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
6870 /* Decrement its lvRefCnt and lvRefCntWtd */
6872 // Use getBBWeight to determine the proper block weight.
6873 // This impacts the block weights when we have IBC data.
6874 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
6877 return WALK_CONTINUE;
6880 /*****************************************************************************
6882 * Routine called to decrement the LclVar ref counts when removing a tree
6883 * during the remove RangeCheck phase.
6884 * This method will decrement the refcounts for any LclVars used below 'deadTree',
6885 * unless the node is found in the 'keepList' (which are saved side effects)
6886 * The keepList is communicated using the walkData.pCallbackData field
6887 * Also the compCurBB must be set to the current BasicBlock which contains
6888 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
6891 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
6893 // We communicate this value using the walkData.pCallbackData field
6895 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
6898 /*****************************************************************************
6900 * Given an array index node, mark it as not needing a range check.
6903 void Compiler::optRemoveRangeCheck(GenTreePtr tree,
6905 bool updateCSEcounts,
6906 unsigned sideEffFlags,
6923 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
6926 noway_assert(stmt->gtOper == GT_STMT);
6927 noway_assert(tree->gtOper == GT_COMMA);
6928 noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
6929 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
6931 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
6936 printf("Before optRemoveRangeCheck:\n");
6941 GenTreePtr sideEffList = nullptr;
6944 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
6947 // Decrement the ref counts for any LclVars that are being deleted
6949 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
6951 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
6952 tree->gtOp.gtOp1 = (sideEffList != NULL) ? sideEffList : gtNewNothingNode();
6954 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
6955 tree->gtFlags |= GTF_DONT_CSE;
6957 /* Recalculate the gtCostSz, etc... */
6958 gtSetStmtInfo(stmt);
6960 /* Re-thread the nodes if necessary */
6961 if (fgStmtListThreaded)
6969 printf("After optRemoveRangeCheck:\n");
6976 /*****************************************************************************
6977 * Return the scale in an array reference, given a pointer to the
6978 * multiplication node.
6981 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr *pIndex DEBUGARG(bool bRngChk))
6984 assert (mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
6985 assert (mul->gtOp.gtOp2->IsCnsIntOrI());
6987 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
6989 if (mul->gtOper == GT_LSH)
6990 scale = ((ssize_t)1) << scale;
6992 GenTreePtr index = mul->gtOp.gtOp1;
6994 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
6996 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
6997 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
6998 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
6999 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7000 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7001 index = index->gtOp.gtOp1;
7004 assert (!bRngChk || index->gtOper != GT_COMMA);
7012 /*****************************************************************************
7013 * Find the last assignment to of the local variable in the block. Return
7014 * RHS or NULL. If any local variable in the RHS has been killed in
7015 * intervening code, return NULL. If the variable being searched for is killed
7016 * in the intervening code, return NULL.
7020 GenTreePtr Compiler::optFindLocalInit(BasicBlock *block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* pLhsRhsKilledAfterInit)
7022 assert(pKilledInOut);
7023 assert(pLhsRhsKilledAfterInit);
7025 *pLhsRhsKilledAfterInit = false;
7027 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7029 GenTreePtr list = block->bbTreeList;
7035 GenTreePtr rhs = NULL;
7036 GenTreePtr stmt = list;
7039 stmt = stmt->gtPrev;
7045 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7046 // If we encounter an assignment to a local variable,
7047 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7049 // And the assigned variable equals the input local,
7050 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7052 // If the assignment is '=' and it is not a conditional, then return rhs.
7053 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7055 rhs = tree->gtOp.gtOp2;
7057 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7058 // as we found a kill to the input local.
7061 *pLhsRhsKilledAfterInit = true;
7062 assert(rhs == NULL);
7068 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7073 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7077 while (stmt != list);
7084 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7085 varRefKinds rhsRefs = VR_NONE;
7086 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7087 bool b = lvaLclVarRefs(rhs, NULL, &rhsRefs, &rhsLocals);
7089 !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) ||
7090 (rhsRefs != VR_NONE))
7092 // If RHS has been indirectly referenced, consider it a write and a kill.
7093 *pLhsRhsKilledAfterInit = true;
7100 /*****************************************************************************
7102 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7107 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2,
7110 if (op1->gtOper == GT_CNS_INT &&
7111 op2->gtOper == GT_CNS_INT)
7113 add1 += op1->gtIntCon.gtIconVal;
7114 add2 += op2->gtIntCon.gtIconVal;
7118 /* Check for +/- constant on either operand */
7120 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7122 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7123 op1 = op1->gtOp.gtOp1;
7126 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7128 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7129 op2 = op2->gtOp.gtOp1;
7132 /* We only allow local variable references */
7134 if (op1->gtOper != GT_LCL_VAR)
7136 if (op2->gtOper != GT_LCL_VAR)
7138 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7141 /* NOTE: Caller ensures that this variable has only one def */
7143 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7144 // printf("size [%d]:\n", add2); gtDispTree(op2);
7149 return (bool)(add1 <= add2);
7154 //------------------------------------------------------------------------------
7155 // optObtainLoopCloningOpts: Identify optimization candidates and update
7156 // the "context" for array optimizations.
7159 // context - data structure where all loop cloning info is kept. The
7160 // optInfo fields of the context are updated with the
7161 // identified optimization candidates.
7163 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7165 for (unsigned i = 0; i < optLoopCount; i++)
7167 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7168 if (optIsLoopClonable(i))
7170 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7172 optIdentifyLoopOptInfo(i, context);
7175 JITDUMP("------------------------------------------------------------\n");
7180 //------------------------------------------------------------------------
7181 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7182 // check if the loop is suitable for the optimizations performed.
7185 // loopNum - the current loop index for which conditions are derived.
7186 // context - data structure where all loop cloning candidates will be
7190 // If the loop is not suitable for the optimizations, return false - context
7191 // should not contain any optimization candidate for the loop if false.
7192 // Else return true.
7195 // Check if the loop is well formed for this optimization and identify the
7196 // optimization candidates and update the "context" parameter with all the
7197 // contextual information necessary to perform the optimization later.
7199 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7201 noway_assert(loopNum < optLoopCount);
7203 LoopDsc* pLoop = &optLoopTable[loopNum];
7205 if (!(pLoop->lpFlags & LPFLG_ITER))
7207 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7211 unsigned ivLclNum = pLoop->lpIterVar();
7212 if (lvaVarAddrExposed(ivLclNum))
7214 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7218 BasicBlock* head = pLoop->lpHead;
7219 BasicBlock* end = pLoop->lpBottom;
7220 BasicBlock* beg = head->bbNext;
7222 if (end->bbJumpKind != BBJ_COND)
7224 JITDUMP("> Couldn't find termination test.\n");
7228 if (end->bbJumpDest != beg)
7230 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7234 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7235 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7237 JITDUMP("> Loop iteration operator not matching\n");
7241 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 &&
7242 (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7243 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7245 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7249 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7250 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7252 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7253 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7257 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7259 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n", pLoop->lpTestTree->gtTreeID);
7264 GenTreePtr op1 = pLoop->lpIterator();
7265 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7268 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum, end->bbNext ? end->bbNext->bbNum : 0);
7270 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7271 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7274 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7277 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7284 //---------------------------------------------------------------------------------------------------------------
7285 // optExtractArrIndex: Try to extract the array index from "tree".
7288 // tree the tree to be checked if it is the array [] operation.
7289 // result the extracted GT_INDEX information is updated in result.
7290 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7293 // Returns true if array index can be extracted, else, return false. See assumption about
7294 // what will be extracted. The "result" variable's rank parameter is advanced for every
7295 // dimension of [] encountered.
7298 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7299 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7300 // to reconstruct this to be able to know if this is an array access.
7303 // The method extracts only if the array base and indices are GT_LCL_VAR.
7305 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7307 // [000000001AF828D8] ---XG------- indir int
7308 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7309 // [000000001AF87340] ------------ + byref
7310 // [000000001AF87160] -------N---- const long 2
7311 // [000000001AF871D8] ------------ << long
7312 // [000000001AF870C0] ------------ cast long <- int
7313 // [000000001AF86F30] i----------- lclVar int V04 loc0
7314 // [000000001AF87250] ------------ + byref
7315 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7316 // [000000001AF87468] ---XG------- comma int
7317 // [000000001AF87020] ---X-------- arrBndsChk void
7318 // [000000001AF86FA8] ---X-------- arrLen int
7319 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7320 // [000000001AF82860] ------------ lclVar int V04 loc0
7321 // [000000001AF829F0] -A-XG------- = int
7322 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7324 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7326 if (tree->gtOper != GT_COMMA)
7330 GenTreePtr before = tree->gtGetOp1();
7331 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7335 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7336 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7340 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7344 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7345 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7350 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7352 GenTreePtr after = tree->gtGetOp2();
7354 if (after->gtOper != GT_IND)
7358 GenTreePtr sibo = after->gtGetOp1();
7359 if (sibo->gtOper != GT_ADD)
7363 GenTreePtr sib = sibo->gtGetOp1();
7364 GenTreePtr ofs = sibo->gtGetOp2();
7365 if (ofs->gtOper != GT_CNS_INT)
7369 if (sib->gtOper != GT_ADD)
7373 GenTreePtr si = sib->gtGetOp2();
7374 GenTreePtr base = sib->gtGetOp1();
7375 if (si->gtOper != GT_LSH)
7379 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7383 GenTreePtr scale = si->gtGetOp2();
7384 GenTreePtr index = si->gtGetOp1();
7385 if (scale->gtOper != GT_CNS_INT)
7389 #ifdef _TARGET_AMD64_
7390 if (index->gtOper != GT_CAST)
7394 GenTreePtr indexVar = index->gtGetOp1();
7396 GenTreePtr indexVar = index;
7398 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7402 if (lhsNum == BAD_VAR_NUM)
7404 result->arrLcl = arrLcl;
7406 result->indLcls.Push(indLcl);
7407 result->bndsChks.Push(tree);
7408 result->useBlock = compCurBB;
7414 //---------------------------------------------------------------------------------------------------------------
7415 // optReconstructArrIndex: Reconstruct array index.
7418 // tree the tree to be checked if it is an array [][][] operation.
7419 // result the extracted GT_INDEX information.
7420 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7423 // Returns true if array index can be extracted, else, return false. "rank" field in
7424 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7427 // Recursively look for a list of array indices. In the example below, we encounter,
7428 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7429 // The return value would then be:
7430 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7432 // V00[V01][V02] would be morphed as:
7434 // [000000001B366848] ---XG------- indir int
7435 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7436 // [000000001B36C200] ---XG------- comma int
7437 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7438 // [000000001B36C278] -A-XG------- comma int
7439 // [000000001B366730] R--XG------- indir ref
7440 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7441 // [000000001B36C818] ---XG------- comma ref
7442 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7443 // [000000001B36BB60] -A-XG------- = ref
7444 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7445 // [000000001B36A668] -A-XG------- = int
7446 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7449 // The method extracts only if the array base and indices are GT_LCL_VAR.
7451 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7453 // If we can extract "tree" (which is a top level comma) return.
7454 if (optExtractArrIndex(tree, result, lhsNum))
7458 // We have a comma (check if array base expr is computed in "before"), descend further.
7459 else if (tree->OperGet() == GT_COMMA)
7461 GenTreePtr before = tree->gtGetOp1();
7462 // "before" should evaluate an array base for the "after" indexing.
7463 if (before->OperGet() != GT_ASG)
7467 GenTreePtr lhs = before->gtGetOp1();
7468 GenTreePtr rhs = before->gtGetOp2();
7470 // "rhs" should contain an GT_INDEX
7471 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7475 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7476 GenTreePtr after = tree->gtGetOp2();
7477 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7478 return optExtractArrIndex(after, result, lhsNum);
7484 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7486 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*) data->pCallbackData);
7490 //-------------------------------------------------------------------------
7491 // optIsStackLocalInvariant: Is stack local invariant in loop.
7494 // loopNum The loop in which the variable is tested for invariance.
7495 // lclNum The local that is tested for invariance in the loop.
7498 // Returns true if the variable is loop invariant in loopNum.
7500 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7502 if (lvaVarAddrExposed(lclNum))
7506 if (optIsVarAssgLoop(loopNum, lclNum))
7513 //----------------------------------------------------------------------------------------------
7514 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7515 // identify as potential candidate and update the loop context.
7518 // tree The tree encountered during the tree walk.
7519 // info Supplies information about the current block or stmt in which the tree is.
7520 // Also supplies the "context" pointer for updating with loop cloning
7521 // candidates. Also supplies loopNum.
7524 // If array index can be reconstructed, check if the iter var of the loop matches the
7525 // array index var in some dim. Also ensure other index vars before the identified
7526 // dim are loop invariant.
7529 // Skip sub trees if the optimization candidate is identified or else continue walking
7531 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7533 ArrIndex arrIndex(getAllocator());
7535 // Check if array index can be optimized.
7536 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7538 assert(tree->gtOper == GT_COMMA);
7542 JITDUMP("Found ArrIndex at tree ");
7544 printf(" which is equivalent to: ");
7549 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7551 return WALK_SKIP_SUBTREES;
7554 // Walk the dimensions and see if iterVar of the loop is used as index.
7555 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7557 // Is index variable also used as the loop iter var.
7558 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7560 // Check the previous indices are all loop invariant.
7561 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7563 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7565 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7566 return WALK_SKIP_SUBTREES;
7572 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7574 JITDUMP(" on dim %d\n", dim);
7577 // Update the loop context.
7578 info->context->EnsureLoopOptInfo(info->loopNum)->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7582 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), dim);
7585 return WALK_SKIP_SUBTREES;
7587 else if (tree->gtOper == GT_ARR_ELEM)
7589 // TODO-CQ: CLONE: Implement.
7590 return WALK_SKIP_SUBTREES;
7592 return WALK_CONTINUE;
7595 struct optRangeCheckDsc
7597 Compiler* pCompiler;
7601 Walk to make sure that only locals and constants are contained in the index
7604 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr * pTree, fgWalkData *data)
7606 GenTreePtr tree = *pTree;
7607 optRangeCheckDsc* pData= (optRangeCheckDsc*) data->pCallbackData;
7609 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR ||
7610 tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7612 pData->bValidIndex = false;
7616 if (tree->gtOper == GT_LCL_VAR)
7618 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7620 pData->bValidIndex = false;
7625 return WALK_CONTINUE;
7629 returns true if a range check can legally be removed (for the moment it checks
7630 that the array is a local array (non subject to racing conditions) and that the
7631 index is either a constant or a local
7633 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7635 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7636 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7637 GenTreePtr pArray = bndsChk->GetArray();
7638 if (pArray == NULL && !bndsChk->gtArrLen->IsCnsIntOrI())
7642 GenTreePtr pIndex = bndsChk->gtIndex;
7644 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
7645 // Otherwise we can be targeted by malicious race-conditions.
7648 if ( pArray->gtOper != GT_LCL_VAR )
7654 printf("Can't remove range check if the array isn't referenced with a local\n");
7662 noway_assert(pArray->gtType == TYP_REF);
7663 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
7665 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
7667 // If the array address has been taken, don't do the optimization
7668 // (this restriction can be lowered a bit, but i don't think it's worth it)
7672 printf("Can't remove range check if the array has its address taken\n");
7683 optRangeCheckDsc Data;
7684 Data.pCompiler =this;
7685 Data.bValidIndex=true;
7687 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
7689 if (!Data.bValidIndex)
7694 printf("Can't remove range check with this index");
7706 /******************************************************************************
7708 * Replace x==null with (x|x)==0 if x is a GC-type.
7709 * This will stress code-gen and the emitter to make sure they support such trees.
7714 void Compiler::optOptimizeBoolsGcStress(BasicBlock * condBlock)
7716 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
7719 noway_assert(condBlock->bbJumpKind == BBJ_COND);
7720 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
7722 noway_assert(condStmt->gtOper == GT_JTRUE);
7727 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
7729 if (comparand == NULL || !varTypeIsGC(comparand->TypeGet()))
7732 if (comparand->gtFlags & (GTF_ASG|GTF_CALL|GTF_ORDER_SIDEEFF))
7735 GenTreePtr comparandClone = gtCloneExpr(comparand);
7737 // Bump up the ref-counts of any variables in 'comparandClone'
7738 compCurBB = condBlock;
7739 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void *)this, true);
7741 noway_assert(relop->gtOp.gtOp1 == comparand);
7742 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
7743 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
7745 // Comparand type is already checked, and we have const int, there is no harm
7746 // morphing it into a TYP_I_IMPL.
7747 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
7748 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
7753 /******************************************************************************
7754 * Function used by folding of boolean conditionals
7755 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
7756 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
7757 * being a boolean lclVar and "op2" the const 0/1.
7758 * On success, the comparand (ie. boolVal) is returned. Else NULL.
7759 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
7760 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
7761 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
7762 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
7765 GenTree * Compiler::optIsBoolCond(GenTree * condBranch,
7766 GenTree * * compPtr,
7769 bool isBool = false;
7771 noway_assert(condBranch->gtOper == GT_JTRUE);
7772 GenTree * cond = condBranch->gtOp.gtOp1;
7774 /* The condition must be "!= 0" or "== 0" */
7776 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
7779 /* Return the compare node to the caller */
7783 /* Get hold of the comparands */
7785 GenTree * opr1 = cond->gtOp.gtOp1;
7786 GenTree * opr2 = cond->gtOp.gtOp2;
7788 if (opr2->gtOper != GT_CNS_INT)
7791 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
7794 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
7796 /* Is the value a boolean?
7797 * We can either have a boolean expression (marked GTF_BOOLEAN) or
7798 * a local variable that is marked as being boolean (lvIsBoolean) */
7800 if (opr1->gtFlags & GTF_BOOLEAN)
7804 else if ((opr1->gtOper == GT_CNS_INT) &&
7805 (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
7809 else if (opr1->gtOper == GT_LCL_VAR)
7811 /* is it a boolean local variable */
7813 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
7814 noway_assert(lclNum < lvaCount);
7816 if (lvaTable[lclNum].lvIsBoolean)
7820 /* Was our comparison against the constant 1 (i.e. true) */
7823 // If this is a boolean expression tree we can reverse the relop
7824 // and change the true to false.
7827 gtReverseCond(cond);
7828 opr2->gtIntCon.gtIconVal = 0;
7839 void Compiler::optOptimizeBools()
7844 printf("*************** In optOptimizeBools()\n");
7847 printf("Blocks/Trees before phase\n");
7848 fgDispBasicBlocks(true);
7858 for (BasicBlock * b1 = fgFirstBB; b1; b1 = b1->bbNext)
7860 /* We're only interested in conditional jumps here */
7862 if (b1->bbJumpKind != BBJ_COND)
7865 /* If there is no next block, we're done */
7867 BasicBlock * b2 = b1->bbNext;
7871 /* The next block must not be marked as BBF_DONT_REMOVE */
7872 if (b2->bbFlags & BBF_DONT_REMOVE)
7875 /* The next block also needs to be a condition */
7877 if (b2->bbJumpKind != BBJ_COND)
7880 optOptimizeBoolsGcStress(b1);
7885 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
7887 if (b1->bbJumpDest == b2->bbJumpDest)
7889 /* Given the following sequence of blocks :
7893 we wil try to fold it to :
7894 B1: brtrue(t1|t2, BX)
7900 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
7902 /* Given the following sequence of blocks :
7906 we will try to fold it to :
7907 B1: brtrue((!t1)&&t2, B3)
7918 /* The second block must contain a single statement */
7920 GenTreePtr s2 = b2->bbTreeList;
7921 if (s2->gtPrev != s2)
7924 noway_assert(s2->gtOper == GT_STMT);
7925 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
7926 noway_assert(t2->gtOper == GT_JTRUE);
7928 /* Find the condition for the first block */
7930 GenTreePtr s1 = b1->bbTreeList->gtPrev;
7932 noway_assert(s1->gtOper == GT_STMT);
7933 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
7934 noway_assert(t1->gtOper == GT_JTRUE);
7936 if (b2->countOfInEdges() > 1)
7939 /* Find the branch conditions of b1 and b2 */
7943 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
7946 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
7949 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
7950 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
7952 // Leave out floats where the bit-representation is more complicated
7953 // - there are two representations for 0.
7955 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
7958 // Make sure the types involved are of the same sizes
7959 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
7961 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
7963 #ifdef _TARGET_ARMARCH_
7964 // Skip the small operand which we cannot encode.
7965 if (varTypeIsSmall(c1->TypeGet()))
7968 /* The second condition must not contain side effects */
7970 if (c2->gtFlags & GTF_GLOB_EFFECT)
7973 /* The second condition must not be too expensive */
7977 if (c2->gtCostEx > 12)
7982 var_types foldType = c1->TypeGet();
7983 if (varTypeIsGC(foldType))
7985 foldType = TYP_I_IMPL;
7990 /* Both conditions must be the same */
7992 if (t1->gtOper != t2->gtOper)
7995 if (t1->gtOper == GT_EQ)
7997 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
7998 So we will branch to BX if (c1&c2)==0 */
8005 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8006 So we will branch to BX if (c1|c2)!=0 */
8014 /* The b1 condition must be the reverse of the b2 condition */
8016 if (t1->gtOper == t2->gtOper)
8019 if (t1->gtOper == GT_EQ)
8021 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8022 So we will branch to BX if (c1&c2)!=0 */
8029 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8030 So we will branch to BX if (c1|c2)==0 */
8037 // Anding requires both values to be 0 or 1
8039 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8043 // Now update the trees
8045 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8048 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8049 cmpOp1->gtFlags |= GTF_BOOLEAN;
8053 t1->gtOp.gtOp1 = cmpOp1;
8054 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8056 #if FEATURE_SET_FLAGS
8057 // For comparisons against zero we will have the GTF_SET_FLAGS set
8058 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8059 // during the CSE phase.
8061 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8062 // as they are no longer feeding directly into a comparisons against zero
8064 // Make sure that the GTF_SET_FLAGS bit is cleared.
8065 // Fix 388436 ARM JitStress WP7
8066 c1->gtFlags &= ~GTF_SET_FLAGS;
8067 c2->gtFlags &= ~GTF_SET_FLAGS;
8069 // The new top level node that we just created does feed directly into
8070 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8071 // we generate an instuction that sets the flags, which allows us
8072 // to omit the cmp with zero instruction.
8074 // Request that the codegen for cmpOp1 sets the condition flags
8075 // when it generates the code for cmpOp1.
8077 cmpOp1->gtRequestSetFlags();
8080 flowList * edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8083 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8087 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8091 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8093 fgRemoveRefPred(b1->bbJumpDest, b1);
8095 b1->bbJumpDest = b2->bbJumpDest;
8097 fgAddRefPred(b2->bbJumpDest, b1);
8100 noway_assert(edge1 != NULL);
8101 noway_assert(edge2 != NULL);
8103 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8104 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8105 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8107 edge1->flEdgeWeightMin = edgeSumMin;
8108 edge1->flEdgeWeightMax = edgeSumMax;
8112 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8113 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8116 /* Get rid of the second block (which is a BBJ_COND) */
8118 noway_assert(b1->bbJumpKind == BBJ_COND);
8119 noway_assert(b2->bbJumpKind == BBJ_COND);
8120 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8121 noway_assert(b1->bbNext == b2); noway_assert(b2->bbNext);
8124 b2->bbFlags |= BBF_REMOVED;
8126 // If b2 was the last block of a try or handler, update the EH table.
8128 ehUpdateForDeletedBlock(b2);
8130 /* Update bbRefs and bbPreds */
8132 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8133 * Remove pred 'b2' for 'b2->bbJumpDest' */
8135 fgReplacePred(b2->bbNext, b2, b1);
8137 fgRemoveRefPred(b2->bbJumpDest, b2);
8139 /* Update the block numbers and try again */
8151 // Update loop table
8152 fgUpdateLoopsAfterCompacting(b1, b2);
8157 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n",
8158 c2->OperIsLeaf() ? "" : "non-leaf ",
8159 b1->bbNum, b2->bbNum);
8160 gtDispTree(s1); printf("\n");
8168 fgDebugCheckBBlist();