1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning ( disable : 4701 )
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = NULL;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
52 DataFlow::DataFlow(Compiler* pCompiler)
53 : m_pCompiler(pCompiler)
57 /*****************************************************************************
61 void Compiler::optSetBlockWeights()
63 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
64 assert(fgDomsComputed);
70 bool firstBBdomsRets = true;
74 for (block = fgFirstBB; (block != NULL); block = block->bbNext)
76 /* Blocks that can't be reached via the first block are rarely executed */
77 if (!fgReachable(fgFirstBB, block))
78 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock *begBlk,
139 /* Calculate the 'loopWeight',
140 this is the amount to increase each block in the loop
141 Our heuristic is that loops are weighted eight times more
142 than straight line code.
143 Thus we increase each block by 7 times the weight of
144 the loop header block,
145 if the loops are all properly formed gives us:
146 (assuming that BB_LOOP_WEIGHT is 8)
148 1 -- non loop basic block
149 8 -- single loop nesting
150 64 -- double loop nesting
151 512 -- triple loop nesting
155 noway_assert(begBlk->bbNum <= endBlk->bbNum);
156 noway_assert(begBlk->isLoopHead());
157 noway_assert(fgReachable(begBlk, endBlk));
161 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
165 noway_assert(!opts.MinOpts());
167 /* Build list of backedges for block begBlk */
168 flowList * backedgeList = NULL;
170 for (flowList* pred = begBlk->bbPreds;
174 /* Is this a backedge? */
175 if (pred->flBlock->bbNum >= begBlk->bbNum)
177 flowList* flow = new (this, CMK_FlowList) flowList();
179 #if MEASURE_BLOCK_SIZE
181 genFlowNodeSize += sizeof(flowList);
182 #endif // MEASURE_BLOCK_SIZE
184 flow->flNext = backedgeList;
185 flow->flBlock = pred->flBlock;
190 /* At least one backedge must have been found (the one from endBlk) */
191 noway_assert(backedgeList);
193 BasicBlock * curBlk = begBlk;
197 noway_assert(curBlk);
199 // For curBlk to be part of a loop that starts at begBlk
200 // curBlk must be reachable from begBlk and (since this is a loop)
201 // likewise begBlk must be reachable from curBlk.
204 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
206 /* If this block reaches any of the backedge blocks we set reachable */
207 /* If this block dominates any of the backedge blocks we set dominates */
208 bool reachable = false;
209 bool dominates = false;
211 for (flowList* tmp = backedgeList;
215 BasicBlock * backedge = tmp->flBlock;
217 if (!curBlk->isRunRarely())
219 reachable |= fgReachable(curBlk, backedge);
220 dominates |= fgDominate (curBlk, backedge);
222 if (dominates && reachable)
229 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
233 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
235 // We have real profile weights, so we aren't going to change this blocks weight
236 weight = curBlk->bbWeight;
241 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
244 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT/2);
248 // The multiplication may have caused us to overflow
250 if (weight < curBlk->bbWeight)
252 // The multiplication caused us to overflow
253 weight = BB_MAX_WEIGHT;
256 // Set the new weight
258 curBlk->modifyBBWeight(weight);
263 printf("\n BB%02u(wt=%s)",
265 refCntWtd2str(curBlk->getBBWeight(this)));
271 /* Stop if we've reached the last block in the loop */
273 if (curBlk == endBlk)
276 curBlk = curBlk->bbNext;
278 /* If we are excluding the endBlk then stop if we've reached endBlk */
280 if (excludeEndBlk && (curBlk == endBlk))
285 /*****************************************************************************
287 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
290 void Compiler::optUnmarkLoopBlocks(BasicBlock *begBlk,
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList * pred = begBlk->bbPreds;
312 curBlk = pred->flBlock;
314 /* is this a backward edge? (from curBlk to begBlk) */
316 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) &&
322 (curBlk->bbJumpKind != BBJ_ALWAYS) )
328 /* Only unmark the loop blocks if we have exactly one loop back edge */
329 if (backEdgeCount != 1)
334 if (backEdgeCount > 0)
336 printf("\nNot removing loop L%02u, due to an additional back edge",
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge",
348 noway_assert(backEdgeCount == 1);
349 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
359 noway_assert(curBlk);
361 // For curBlk to be part of a loop that starts at begBlk
362 // curBlk must be reachable from begBlk and (since this is a loop)
363 // likewise begBlk must be reachable from curBlk.
365 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
367 unsigned weight = curBlk->bbWeight;
369 // Don't unmark blocks that are set to BB_MAX_WEIGHT
370 // Don't unmark blocks when we are using profile weights
372 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
374 if (!fgDominate(curBlk, endBlk))
380 /* Merging of blocks can disturb the Dominates
381 information (see RAID #46649) */
382 if (weight < BB_LOOP_WEIGHT)
386 // We can overflow here so check for it
387 if (weight < curBlk->bbWeight)
388 weight = BB_MAX_WEIGHT;
390 assert (weight >= BB_LOOP_WEIGHT);
392 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
398 printf("\n BB%02u(wt=%s)",
400 refCntWtd2str(curBlk->getBBWeight(this)));
404 /* Stop if we've reached the last block in the loop */
406 if (curBlk == endBlk)
409 curBlk = curBlk->bbNext;
411 /* Stop if we go past the last block in the loop, as it may have been deleted */
412 if (curBlk->bbNum > endBlk->bbNum)
417 /*****************************************************************************************************
419 * Function called to update the loop table and bbWeight before removing a block
422 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock * block,
428 noway_assert(!opts.MinOpts());
430 bool removeLoop = false;
432 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
433 /* Special case: the block was the head of a loop - or pointing to a loop entry */
435 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
437 /* Some loops may have been already removed by
438 * loop unrolling or conditional folding */
440 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
443 if (block == optLoopTable[loopNum].lpEntry ||
444 block == optLoopTable[loopNum].lpBottom )
446 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
453 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
454 optPrintLoopInfo(loopNum);
458 /* If the loop is still in the table
459 * any block in the loop must be reachable !!! */
461 noway_assert(optLoopTable[loopNum].lpEntry != block);
462 noway_assert(optLoopTable[loopNum].lpBottom != block);
464 if (optLoopTable[loopNum].lpExit == block)
466 optLoopTable[loopNum].lpExit = NULL;
467 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;;
471 /* If this points to the actual entry in the loop
472 * then the whole loop may become unreachable */
474 switch (block->bbJumpKind)
477 BasicBlock * * jumpTab;
481 if (block->bbNext == optLoopTable[loopNum].lpEntry)
486 if (block->bbJumpKind == BBJ_NONE)
492 noway_assert(block->bbJumpDest);
493 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
500 jumpCnt = block->bbJumpSwt->bbsCount;
501 jumpTab = block->bbJumpSwt->bbsDstTab;
504 noway_assert(*jumpTab);
505 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
509 } while (++jumpTab, --jumpCnt);
518 /* Check if the entry has other predecessors outside the loop
519 * TODO: Replace this when predecessors are available */
521 BasicBlock * auxBlock;
522 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
524 /* Ignore blocks in the loop */
526 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
527 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
530 switch (auxBlock->bbJumpKind)
533 BasicBlock * * jumpTab;
537 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
542 if (auxBlock->bbJumpKind == BBJ_NONE)
548 noway_assert(auxBlock->bbJumpDest);
549 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
556 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
557 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
561 noway_assert(*jumpTab);
562 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
566 } while (++jumpTab, --jumpCnt);
576 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
579 else if (optLoopTable[loopNum].lpHead == block)
581 /* The loop has a new head - Just update the loop table */
582 optLoopTable[loopNum].lpHead = block->bbPrev;
587 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
588 optPrintLoopInfo(loopNum);
593 if ((skipUnmarkLoop == false) &&
594 ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
595 (block->bbJumpDest->isLoopHead()) &&
596 (block->bbJumpDest->bbNum <= block->bbNum) &&
598 (fgCurBBEpochSize == fgDomBBcount + 1) &&
599 fgReachable(block->bbJumpDest, block))
601 optUnmarkLoopBlocks(block->bbJumpDest, block);
607 /*****************************************************************************
609 * Given the beginBlock of the loop, return the index of this loop
613 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
617 for (lnum = 0; lnum < optLoopCount; lnum++)
619 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
626 noway_assert(!"Loop number not found.");
631 /*****************************************************************************
633 * Print loop info in an uniform way.
636 void Compiler::optPrintLoopInfo(unsigned loopInd,
638 BasicBlock * lpFirst,
640 BasicBlock * lpEntry,
641 BasicBlock * lpBottom,
642 unsigned char lpExitCnt,
647 noway_assert(lpHead);
650 // NOTE: we take "loopInd" as an argument instead of using the one
651 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
652 // has not be set correctly. For example, in optRecordLoop().
653 // However, in most of the cases, loops should have been recorded.
654 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
655 // version of this method.
657 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
658 if (lpTop != lpFirst)
660 printf(" (loop top is BB%02u)", lpTop->bbNum);
663 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d",
670 if (lpExitCnt == 1) {
671 printf(" at BB%02u", lpExit->bbNum);
674 if (parentLoop != BasicBlock::NOT_IN_LOOP)
676 printf(", parent loop = L%02u", parentLoop);
681 /*****************************************************************************
683 * Print loop information given the index of the loop in the loop table.
686 void Compiler::optPrintLoopInfo(unsigned lnum)
688 noway_assert(lnum < optLoopCount);
690 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
692 optPrintLoopInfo(lnum,
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
776 // Obtain the relop from the "test" tree.
778 if (test->gtOper == GT_JTRUE)
780 relop = test->gtGetOp1();
784 assert(test->gtOper == GT_ASG);
785 relop = test->gtGetOp2();
788 noway_assert(relop->OperKind() & GTK_RELOP);
790 GenTreePtr opr1 = relop->gtOp.gtOp1;
791 GenTreePtr opr2 = relop->gtOp.gtOp2;
796 // Make sure op1 or op2 is the iterVar.
797 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
802 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
812 if (iterOp->gtType != TYP_INT)
817 // Mark the iterator node.
818 iterOp->gtFlags |= GTF_VAR_ITERATOR;
820 // Check what type of limit we have - constant, variable or arr-len.
821 if (limitOp->gtOper == GT_CNS_INT)
823 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
827 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
829 else if (limitOp->gtOper == GT_ARR_LENGTH)
831 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
837 // Save the type of the comparison between the iterator and the limit.
838 optLoopTable[loopInd].lpTestTree = relop;
842 //----------------------------------------------------------------------------------
843 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
846 // incr The incr tree to be checked. Whether incr tree is
847 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
850 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
851 // and the rhs limit is extracted from the "test" tree. The limit information is
852 // added to the loop table.
855 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
857 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
860 genTreeOps updateOper;
861 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
862 if (iterVar != BAD_VAR_NUM)
864 // We have v = v op y type asg node.
877 // Increment should be by a const int.
878 // TODO-CQ: CLONE: allow variable increments.
879 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
888 //----------------------------------------------------------------------------------
889 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
892 // from, to - are blocks (beg, end) which are part of the loop.
893 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
894 // pIterVar - see return value.
897 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
901 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
902 // assigned in the loop.
904 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
907 unsigned iterVar = optIsLoopIncrTree(incr);
908 if (iterVar == BAD_VAR_NUM)
912 if (optIsVarAssigned(from, to, incr, iterVar))
914 JITDUMP("iterVar is assigned in loop\n");
922 //----------------------------------------------------------------------------------
923 // optIsLoopTestEvalIntoTemp:
924 // Pattern match if the test tree is computed into a tmp
925 // and the "tmp" is used as jump condition for loop termination.
928 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
929 // where Vtmp contains the actual loop test result.
930 // newStmt - contains the statement that is the actual test stmt involving
931 // the loop iterator.
934 // Returns true if a new test tree can be obtained.
937 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
938 // Then returns the rhs for def of Vtmp as the "test" node.
941 // This method just retrieves what it thinks is the "test" node,
942 // the callers are expected to verify that "iterVar" is used in the test.
944 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
946 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
948 if (test->gtOper != GT_JTRUE)
953 GenTreePtr relop = test->gtGetOp1();
954 noway_assert(relop->OperIsCompare());
956 GenTreePtr opr1 = relop->gtOp.gtOp1;
957 GenTreePtr opr2 = relop->gtOp.gtOp2;
959 // Make sure we have jtrue (vtmp != 0)
960 if ((relop->OperGet() == GT_NE) &&
961 (opr1->OperGet() == GT_LCL_VAR) &&
962 (opr2->OperGet() == GT_CNS_INT) &&
963 opr2->IsIntegralConst(0))
965 // Get the previous statement to get the def (rhs) of Vtmp to see
966 // if the "test" is evaluated into Vtmp.
967 GenTreePtr prevStmt = testStmt->gtPrev;
968 if (prevStmt == nullptr)
973 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
974 if (tree->OperGet() == GT_ASG)
976 GenTreePtr lhs = tree->gtOp.gtOp1;
977 GenTreePtr rhs = tree->gtOp.gtOp2;
979 // Return as the new test node.
980 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
982 if (rhs->OperIsCompare())
993 //----------------------------------------------------------------------------------
994 // optExtractInitTestIncr:
995 // Extract the "init", "test" and "incr" nodes of the loop.
998 // head - Loop head block
999 // bottom - Loop bottom block
1000 // top - Loop top block
1001 // ppInit - The init stmt of the loop if found.
1002 // ppTest - The test stmt of the loop if found.
1003 // ppIncr - The incr stmt of the loop if found.
1006 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1007 // returns true. Returns false if the information can't be extracted.
1010 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1011 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1012 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1015 // +-------<-----------------<-----------+
1018 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1020 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1021 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1022 // in the jtrue condition.
1025 // This method just retrieves what it thinks is the "test" node,
1026 // the callers are expected to verify that "iterVar" is used in the test.
1028 bool Compiler::optExtractInitTestIncr(BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1030 assert(ppInit != nullptr);
1031 assert(ppTest != nullptr);
1032 assert(ppIncr != nullptr);
1034 // Check if last two statements in the loop body are the increment of the iterator
1035 // and the loop termination test.
1036 noway_assert(bottom->bbTreeList != nullptr);
1037 GenTreePtr test = bottom->bbTreeList->gtPrev;
1038 noway_assert(test != nullptr && test->gtNext == nullptr);
1041 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1046 // Check if we have the incr tree before the test tree, if we don't,
1047 // check if incr is part of the loop "top".
1048 GenTreePtr incr = test->gtPrev;
1049 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1051 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1056 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1057 GenTreePtr topLast = top->bbTreeList->gtPrev;
1058 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1068 assert(test != incr);
1070 // Find the last statement in the loop pre-header which we expect to be the initialization of
1071 // the loop iterator.
1072 GenTreePtr phdr = head->bbTreeList;
1073 if (phdr == nullptr)
1078 GenTreePtr init = phdr->gtPrev;
1079 noway_assert(init != nullptr && (init->gtNext == 0));
1081 // If it is a duplicated loop condition, skip it.
1082 if (init->gtFlags & GTF_STMT_CMPADD)
1084 // Must be a duplicated loop condition.
1085 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1086 init = init->gtPrev;
1087 noway_assert(init != nullptr);
1090 noway_assert(init->gtOper == GT_STMT);
1091 noway_assert(test->gtOper == GT_STMT);
1092 noway_assert(incr->gtOper == GT_STMT);
1094 *ppInit = init->gtStmt.gtStmtExpr;
1095 *ppTest = test->gtStmt.gtStmtExpr;
1096 *ppIncr = incr->gtStmt.gtStmtExpr;
1101 /*****************************************************************************
1103 * Record the loop in the loop table.
1106 void Compiler::optRecordLoop(BasicBlock * head,
1110 BasicBlock * bottom,
1112 unsigned char exitCnt)
1114 // Record this loop in the table, if there's room.
1116 assert(optLoopCount <= MAX_LOOP_NUM);
1117 if (optLoopCount == MAX_LOOP_NUM)
1120 loopOverflowThisMethod = true;
1125 // Assumed preconditions on the loop we're adding.
1126 assert(first->bbNum <= top->bbNum);
1127 assert(top->bbNum <= entry->bbNum);
1128 assert(entry->bbNum <= bottom->bbNum);
1129 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1131 // If the new loop contains any existing ones, add it in the right place.
1132 unsigned char loopInd = optLoopCount;
1133 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1135 unsigned char prev = prevPlus1 - 1;
1136 if (optLoopTable[prev].lpContainedBy(first, bottom))
1139 // Move up any loops if necessary.
1140 for (unsigned j = optLoopCount; j > loopInd; j--)
1142 optLoopTable[j] = optLoopTable[j-1];
1146 for (unsigned i = loopInd+1; i < optLoopCount; i++)
1148 // The loop is well-formed.
1149 assert(optLoopTable[i].lpWellFormed());
1150 // Check for disjoint.
1151 if (optLoopTable[i].lpDisjoint(first, bottom)) continue;
1152 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1153 assert(optLoopTable[i].lpContainedBy(first, bottom));
1157 optLoopTable[loopInd].lpHead = head;
1158 optLoopTable[loopInd].lpFirst = first;
1159 optLoopTable[loopInd].lpTop = top;
1160 optLoopTable[loopInd].lpBottom = bottom;
1161 optLoopTable[loopInd].lpEntry = entry;
1162 optLoopTable[loopInd].lpExit = exit;
1163 optLoopTable[loopInd].lpExitCnt = exitCnt;
1165 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1166 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1167 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1169 optLoopTable[loopInd].lpFlags = 0;
1171 // We haven't yet recorded any side effects.
1172 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1173 optLoopTable[loopInd].lpFieldsModified = nullptr;
1174 optLoopTable[loopInd].lpArrayElemTypesModified = 0;
1176 // If DO-WHILE loop mark it as such.
1177 if (head->bbNext == entry)
1179 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1182 // If single exit loop mark it as such.
1186 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1190 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1191 // We have the following restrictions:
1192 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1193 // 2. There must be a loop iterator (a local var) that is
1194 // incremented (decremented or lsh, rsh, mul) with a constant value
1195 // 3. The iterator is incremented exactly once
1196 // 4. The loop condition must use the iterator.
1198 if (bottom->bbJumpKind == BBJ_COND)
1203 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1208 unsigned iterVar = BAD_VAR_NUM;
1209 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1214 // Make sure the "iterVar" initialization is never skipped,
1215 // i.e. HEAD dominates the ENTRY.
1216 if (!fgDominate(head, entry))
1221 if (!optPopulateInitInfo(loopInd, init, iterVar))
1226 // Check that the iterator is used in the loop condition.
1227 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1232 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1233 // Record the iterator, the pointer to the test node
1234 // and the initial value of the iterator (constant or local var)
1235 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1238 optLoopTable[loopInd].lpIterTree = incr;
1241 // Save the initial value of the iterator - can be lclVar or constant
1242 // Flag the loop accordingly.
1248 simpleTestLoopCount++;
1251 // Check if a constant iteration loop.
1252 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) &&
1253 (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1255 // This is a constant loop.
1256 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1258 constIterLoopCount++;
1265 printf("\nConstant loop initializer:\n");
1268 printf("\nConstant loop body:\n");
1270 BasicBlock* block = head;
1273 block = block->bbNext;
1274 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1276 if (stmt->gtStmt.gtStmtExpr == incr)
1279 gtDispTree(stmt->gtStmt.gtStmtExpr);
1282 while (block != bottom);
1289 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1294 //------------------------------------------------------------------------
1295 // optPrintLoopRecording: Print a recording of the loop.
1298 // loopInd - loop index.
1300 void Compiler::optPrintLoopRecording(unsigned loopInd)
1302 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1303 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1304 optLoopTable[loopInd].lpHead,
1305 optLoopTable[loopInd].lpFirst,
1306 optLoopTable[loopInd].lpTop,
1307 optLoopTable[loopInd].lpEntry,
1308 optLoopTable[loopInd].lpBottom,
1309 optLoopTable[loopInd].lpExitCnt,
1310 optLoopTable[loopInd].lpExit
1313 // If an iterator loop print the iterator and the initialization.
1314 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1316 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1318 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1320 printf("%d )", optLoopTable[loopInd].lpIterConst());
1322 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1323 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1324 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1325 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1327 // If a simple test condition print operator and the limits */
1328 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1330 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1331 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1333 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1334 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1342 void Compiler::optCheckPreds()
1345 BasicBlock * blockPred;
1348 for (block = fgFirstBB; block; block = block->bbNext)
1350 for (pred = block->bbPreds; pred; pred = pred->flNext)
1352 // make sure this pred is part of the BB list
1353 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1355 if (blockPred == pred->flBlock)
1358 noway_assert(blockPred);
1359 switch (blockPred->bbJumpKind)
1362 if (blockPred->bbJumpDest == block)
1366 noway_assert(blockPred->bbNext == block);
1368 case BBJ_EHFILTERRET:
1370 case BBJ_EHCATCHRET:
1371 noway_assert(blockPred->bbJumpDest == block);
1382 /*****************************************************************************
1383 * Find the natural loops, using dominators. Note that the test for
1384 * a loop is slightly different from the standard one, because we have
1385 * not done a depth first reordering of the basic blocks.
1388 void Compiler::optFindNaturalLoops()
1392 printf("*************** In optFindNaturalLoops()\n");
1397 flowList * predEntry;
1399 noway_assert(fgDomsComputed);
1403 hasMethodLoops = false;
1404 loopsThisMethod = 0;
1405 loopOverflowThisMethod = false;
1408 /* We will use the following terminology:
1409 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1410 Not part of the looping of the loop.
1411 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1412 * but not the outer loop. ???)
1413 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1414 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1415 * EXIT - the loop exit or the block right after the bottom
1416 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1418 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1449 BasicBlock * bottom;
1452 unsigned char exitCount;
1455 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1461 // Blocks that are rarely run have a zero bbWeight and should
1462 // never be optimized here
1464 if (top->bbWeight == BB_ZERO_WEIGHT)
1467 for (pred = top->bbPreds; pred; pred = pred->flNext)
1469 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1470 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1471 * as the definition says, but merely an indication that we have a loop there).
1472 * Thus, we have to be very careful and after entry discovery check that it is indeed
1473 * the only place we enter the loop (especially for non-reducible flow graphs).
1476 bottom = pred->flBlock;
1479 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1481 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) ||
1482 (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1483 (bottom->bbJumpKind == BBJ_EHCATCHRET) ||
1484 (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1485 (bottom->bbJumpKind == BBJ_SWITCH) )
1487 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1488 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1492 BasicBlock * loopBlock;
1494 /* The presence of a "back edge" is an indication that a loop might be present here
1497 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1498 * node in the loop to any other node in the loop (wholly within the loop)
1499 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1500 * in the loop from outside the loop, and that is through the ENTRY
1503 /* Let's find the loop ENTRY */
1505 if (head->bbJumpKind == BBJ_ALWAYS)
1507 if (head->bbJumpDest->bbNum <= bottom->bbNum &&
1508 head->bbJumpDest->bbNum >= top->bbNum )
1510 /* OK - we enter somewhere within the loop */
1511 entry = head->bbJumpDest;
1513 /* some useful asserts
1514 * Cannot enter at the top - should have being caught by redundant jumps */
1516 assert ((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1520 /* special case - don't consider now */
1521 //assert (!"Loop entered in weird way!");
1525 // Can we fall through into the loop?
1526 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1528 /* The ENTRY is at the TOP (a do-while loop) */
1533 goto NO_LOOP; // head does not flow into the loop bail for now
1536 // Now we find the "first" block -- the earliest block reachable within the loop.
1537 // This is usually the same as "top", but can differ in rare cases where "top" is
1538 // the entry block of a nested loop, and that nested loop branches backwards to a
1539 // a block before "top". We find this by searching for such backwards branches
1540 // in the loop known so far.
1541 BasicBlock* first = top;
1542 BasicBlock* newFirst;
1543 bool blocksToSearch = true;
1544 BasicBlock* validatedAfter = bottom->bbNext;
1545 while (blocksToSearch)
1547 blocksToSearch = false;
1549 blocksToSearch = false;
1550 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1552 unsigned nSucc = loopBlock->NumSucc();
1553 for (unsigned j = 0; j < nSucc; j++)
1555 BasicBlock* succ = loopBlock->GetSucc(j);
1556 if ( (newFirst == nullptr && succ->bbNum < first->bbNum)
1557 || (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1563 if (newFirst != nullptr)
1565 validatedAfter = first;
1567 blocksToSearch = true;
1571 // Is "head" still before "first"? If not, we don't have a valid loop...
1572 if (head->bbNum >= first->bbNum)
1574 JITDUMP("Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1575 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1579 /* Make sure ENTRY dominates all blocks in the loop
1580 * This is necessary to ensure condition 2. above
1581 * At the same time check if the loop has a single exit
1582 * point - those loops are easier to optimize */
1584 for (loopBlock = top;
1585 loopBlock != bottom->bbNext;
1586 loopBlock = loopBlock->bbNext)
1588 if (!fgDominate(entry, loopBlock))
1593 if (loopBlock == bottom)
1595 if (bottom->bbJumpKind != BBJ_ALWAYS)
1597 /* there is an exit at the bottom */
1599 noway_assert(bottom->bbJumpDest == top);
1606 BasicBlock * exitPoint;
1608 switch (loopBlock->bbJumpKind)
1611 case BBJ_CALLFINALLY:
1613 case BBJ_EHCATCHRET:
1614 assert (loopBlock->bbJumpDest);
1615 exitPoint = loopBlock->bbJumpDest;
1617 if (exitPoint->bbNum < top->bbNum ||
1618 exitPoint->bbNum > bottom->bbNum )
1620 /* exit from a block other than BOTTOM */
1629 case BBJ_EHFINALLYRET:
1630 case BBJ_EHFILTERRET:
1631 /* The "try" associated with this "finally" must be in the
1632 * same loop, so the finally block will return control inside the loop */
1637 /* those are exits from the loop */
1644 unsigned jumpCnt; jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1645 BasicBlock * * jumpTab; jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1649 noway_assert(*jumpTab);
1650 exitPoint = *jumpTab;
1652 if (exitPoint->bbNum < top->bbNum ||
1653 exitPoint->bbNum > bottom->bbNum )
1659 while (++jumpTab, --jumpCnt);
1663 noway_assert(!"Unexpected bbJumpKind");
1668 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1669 * This is to ensure condition 1. above which prevents marking fake loops
1671 * Below is an example:
1679 * The example above is not a loop since we bail after the first iteration
1681 * The condition we have to check for is
1682 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1683 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1685 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1686 * loop bottom then we cannot iterate
1688 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1689 * part of the loop nodes (as per definition they are loop exits executed only once),
1690 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1692 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1694 switch (loopBlock->bbJumpKind)
1699 if (fgDominate(loopBlock, bottom))
1706 bool canIterateLoop = false;
1708 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1710 if (predEntry->flBlock->bbNum >= top->bbNum &&
1711 predEntry->flBlock->bbNum <= bottom->bbNum )
1713 canIterateLoop = true;
1716 else if (predEntry->flBlock != head)
1718 // The entry block has multiple predecessors outside the loop; the 'head'
1719 // block isn't the only one. We only support a single 'head', so bail.
1724 if (!canIterateLoop)
1727 /* Double check - make sure that all loop blocks except ENTRY
1728 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1729 * us from considering non-loops due to incorrectly assuming that we had a back edge
1732 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1735 for (loopBlock = top;
1736 loopBlock != bottom->bbNext;
1737 loopBlock = loopBlock->bbNext)
1739 if (loopBlock == entry)
1742 for (predTop = loopBlock->bbPreds;
1744 predTop = predTop->flNext)
1746 if (predTop->flBlock->bbNum < top->bbNum ||
1747 predTop->flBlock->bbNum > bottom->bbNum )
1749 //noway_assert(!"Found loop with multiple entries");
1755 // Disqualify loops where the first block of the loop is less nested in EH than
1756 // the bottom block. That is, we don't want to handle loops where the back edge
1757 // goes from within an EH region to a first block that is outside that same EH
1758 // region. Note that we *do* handle loops where the first block is the *first*
1759 // block of a more nested EH region (since it is legal to branch to the first
1760 // block of an immediately more nested EH region). So, for example, disqualify
1767 // BB10 BBJ_COND => BB02
1771 // Here, BB10 is more nested than BB02.
1773 if (bottom->hasTryIndex() &&
1774 !bbInTryRegions(bottom->getTryIndex(), first))
1776 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting loop.\n",
1777 first->bbNum, bottom->bbNum);
1781 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1782 // Disqualify loops where the first block of the loop is a finally target.
1783 // The main problem is when multiple loops share a 'first' block that is a finally
1784 // target and we canonicalize the loops by adding a new loop head. In that case, we
1785 // need to update the blocks so the finally target bit is moved to the newly created
1786 // block, and removed from the old 'first' block. This is 'hard', so at this point
1787 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1788 // long-term), it's easier to disallow the loop than to update the flow graph to
1789 // support this case.
1791 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1793 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n",
1797 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1799 /* At this point we have a loop - record it in the loop table
1800 * If we found only one exit, record it in the table too
1801 * (otherwise an exit = 0 in the loop table means multiple exits) */
1808 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1811 if (!hasMethodLoops)
1813 /* mark the method as containing natural loops */
1815 hasMethodLoops = true;
1818 /* increment total number of loops found */
1822 /* keep track of the number of exits */
1823 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1824 #endif // COUNT_LOOPS
1827 /* current predecessor not good for a loop - continue with another one, if any */
1833 loopCountTable.record(loopsThisMethod);
1834 if (maxLoopsPerMethod < loopsThisMethod)
1836 maxLoopsPerMethod = loopsThisMethod;
1838 if (loopOverflowThisMethod)
1840 totalLoopOverflows++;
1842 #endif // COUNT_LOOPS
1844 // Now the loop indices are stable. We can figure out parent/child relationships
1845 // (using table indices to name loops), and label blocks.
1846 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1848 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1851 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1853 optLoopTable[loopInd].lpParent = possibleParent;
1854 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1855 optLoopTable[possibleParent].lpChild = loopInd;
1861 // Now label the blocks with the innermost loop to which they belong. Since parents
1862 // precede children in the table, doing the labeling for each loop in order will achieve
1863 // this -- the innermost loop labeling will be done last.
1864 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1866 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1867 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1868 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1870 blk->bbNatLoopNum = loopInd;
1871 if (blk == bottom) break;
1872 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1876 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1877 // one, if necessary, for loops containing others that share a "top."
1879 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1881 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1882 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1886 if (optCanonicalizeLoopNest(loopInd))
1893 fgUpdateChangedFlowGraph();
1897 if (verbose && optLoopCount > 0)
1899 printf("\nFinal natural loop table:\n");
1900 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1902 optPrintLoopInfo(loopInd);
1909 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1911 BasicBlock* newJumpDest = nullptr;
1912 switch (blk->bbJumpKind)
1917 case BBJ_EHFILTERRET:
1918 case BBJ_EHFINALLYRET:
1919 case BBJ_EHCATCHRET:
1920 // These have no jump destination to update.
1925 case BBJ_CALLFINALLY:
1927 // All of these have a single jump destination to update.
1928 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1930 blk->bbJumpDest = newJumpDest;
1936 bool redirected = false;
1937 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1939 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1941 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1945 // If any redirections happend, invalidate the switch table map for the switch.
1947 GetSwitchDescMap()->Remove(blk);
1956 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1957 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1959 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1961 // copy the jump destination(s) from "from" to "to".
1962 switch (to->bbJumpKind)
1966 case BBJ_CALLFINALLY:
1968 // All of these have a single jump destination to update.
1969 to->bbJumpDest = from->bbJumpDest;
1974 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1975 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1976 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
1978 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
1980 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
1990 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
1991 // Returns 'true' if the flow graph is modified.
1992 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
1994 bool modified = false;
1996 // Is the top of the current loop not in any nested loop?
1997 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
1999 if (optCanonicalizeLoop(loopInd))
2005 for (unsigned char child = optLoopTable[loopInd].lpChild;
2006 child != BasicBlock::NOT_IN_LOOP;
2007 child = optLoopTable[child].lpSibling)
2009 if (optCanonicalizeLoopNest(child))
2018 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2020 // Is the top uniquely part of the current loop?
2021 BasicBlock* t = optLoopTable[loopInd].lpTop;
2023 if (t->bbNatLoopNum == loopInd)
2026 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to canonicalize\n",
2027 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2029 // Otherwise, the top of this loop is also part of a nested loop.
2031 // Insert a new unique top for this loop. We must be careful to put this new
2032 // block in the correct EH region. Note that f->bbPrev might be in a different
2033 // EH region. For example:
2041 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2042 // On the other hand, the first block of multiple loops might be the first
2043 // block of a 'try' region that is completely contained in the multiple loops.
2048 // BB10 BBJ_ALWAYS => BB08
2050 // BB12 BBJ_ALWAYS => BB08
2052 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2053 // is a single-block "try" region. Neither loop "bottom" block is in the same
2054 // "try" region as BB08. This is legal because you can jump to the first block
2055 // of a try region. With EH normalization, no two "try" regions will share
2056 // this block. In this case, we need to insert a new block for the outer loop
2057 // in the same EH region as the branch from the "bottom":
2062 // BB10 BBJ_ALWAYS => BB08
2064 // BB12 BBJ_ALWAYS => BB30
2066 // Another possibility is that the "first" block of the loop nest can be the first block
2067 // of a "try" region that also has other predecessors than those in the loop, or even in
2068 // the "try" region (since blocks can target the first block of a "try" region). For example:
2072 // BB10 BBJ_ALWAYS => BB08
2074 // BB12 BBJ_ALWAYS => BB08
2077 // BB20 BBJ_ALWAYS => BB08
2079 // BB25 BBJ_ALWAYS => BB08
2081 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2082 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2083 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2084 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2085 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2086 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2087 // situation of branching to a non-first block of a 'try' region.
2089 // We can also have a loop nest where the "first" block is outside of a "try" region
2090 // and the back edges are inside a "try" region, for example:
2094 // BB09 try { BBJ_COND => BB02
2096 // BB15 BBJ_COND => BB02
2098 // BB21 } // end of "try"
2100 // In this case, both loop back edges were formed by "leave" instructions that were
2101 // imported into branches that were later made conditional. In this case, we don't
2102 // want to copy the EH region of the back edge, since that would create a block
2103 // outside of and disjoint with the "try" region of the back edge. However, to
2104 // simplify things, we disqualify this type of loop, so we should never see this here.
2106 BasicBlock* h = optLoopTable[loopInd].lpHead;
2107 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2108 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2110 // The loop must be entirely contained within a single handler region.
2111 assert(BasicBlock::sameHndRegion(f, b));
2113 // If the bottom block is in the same "try" region, then we extend the EH
2114 // region. Otherwise, we add the new block outside the "try" region.
2115 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2116 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2119 // We need to set the EH region manually. Set it to be the same
2120 // as the bottom block.
2121 newT->copyEHRegion(b);
2124 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2126 // Redirect the "bottom" of the current loop to "newT".
2127 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2128 blockMap->Set(t, newT);
2129 optRedirectBlock(b, blockMap);
2131 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2132 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2133 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2134 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2137 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2138 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2139 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2140 // edge of the blockMap, so nothing will happen.
2141 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2143 BasicBlock* topPredBlock = topPred->flBlock;
2145 // Skip if topPredBlock is in the loop.
2146 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2147 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2148 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2149 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2151 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not redirecting its bottom edge\n",
2152 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2156 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum, newT->bbNum);
2157 optRedirectBlock(topPredBlock, blockMap);
2160 assert(newT->bbNext == f);
2163 newT->bbJumpKind = BBJ_ALWAYS;
2164 newT->bbJumpDest = t;
2165 newT->bbTreeList = nullptr;
2166 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2169 // If it had been a do-while loop (top == entry), update entry, as well.
2170 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2171 if (optLoopTable[loopInd].lpTop == origE)
2172 optLoopTable[loopInd].lpEntry = newT;
2173 optLoopTable[loopInd].lpTop = newT;
2174 optLoopTable[loopInd].lpFirst = newT;
2176 newT->bbNatLoopNum = loopInd;
2178 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum, dspPtr(newT), loopInd);
2180 // Make sure the head block still goes to the entry...
2181 if (h->bbJumpKind == BBJ_NONE &&
2182 h->bbNext != optLoopTable[loopInd].lpEntry)
2184 h->bbJumpKind = BBJ_ALWAYS;
2185 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2187 else if (h->bbJumpKind == BBJ_COND &&
2188 h->bbNext == newT &&
2189 newT != optLoopTable[loopInd].lpEntry)
2191 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/true);
2192 optLoopTable[loopInd].lpHead = h2;
2193 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2194 h2->bbTreeList = nullptr;
2195 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2198 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2199 // it must be the case that they were do-while's (since "h" fell through to the entry).
2200 // The new node "newT" becomes the head of such loops.
2201 for (unsigned char childLoop = optLoopTable[loopInd].lpChild;
2202 childLoop != BasicBlock::NOT_IN_LOOP;
2203 childLoop = optLoopTable[childLoop].lpSibling)
2205 if ( optLoopTable[childLoop].lpEntry == origE
2206 && optLoopTable[childLoop].lpHead == h
2207 && newT->bbJumpKind == BBJ_NONE
2208 && newT->bbNext == origE)
2210 optUpdateLoopHead(childLoop, h, newT);
2216 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2218 assert(l1 != BasicBlock::NOT_IN_LOOP);
2219 if (l1 == l2) return true;
2220 else if (l2 == BasicBlock::NOT_IN_LOOP) return false;
2221 else return optLoopContains(l1, optLoopTable[l2].lpParent);
2224 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2226 assert(optLoopTable[loopInd].lpHead == from);
2227 optLoopTable[loopInd].lpHead = to;
2228 for (unsigned char childLoop = optLoopTable[loopInd].lpChild;
2229 childLoop != BasicBlock::NOT_IN_LOOP;
2230 childLoop = optLoopTable[childLoop].lpSibling)
2232 if (optLoopTable[childLoop].lpHead == from)
2233 optUpdateLoopHead(childLoop, from, to);
2237 /*****************************************************************************
2238 * If the : i += const" will cause an overflow exception for the small types.
2241 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2247 case TYP_BYTE: type_MAX = SCHAR_MAX; break;
2248 case TYP_UBYTE: type_MAX = UCHAR_MAX; break;
2249 case TYP_SHORT: type_MAX = SHRT_MAX; break;
2250 case TYP_CHAR: type_MAX = USHRT_MAX; break;
2252 case TYP_UINT: // Detected by checking for 32bit ....
2253 case TYP_INT: return false; // ... overflow same as done for TYP_INT
2255 default: NO_WAY("Bad type");
2258 if (iterAtExit > type_MAX)
2264 /*****************************************************************************
2265 * If the "i -= const" will cause an underflow exception for the small types
2268 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2274 case TYP_BYTE: type_MIN = SCHAR_MIN; break;
2275 case TYP_SHORT: type_MIN = SHRT_MIN; break;
2276 case TYP_UBYTE: type_MIN = 0; break;
2277 case TYP_CHAR: type_MIN = 0; break;
2279 case TYP_UINT: // Detected by checking for 32bit ....
2280 case TYP_INT: return false; // ... underflow same as done for TYP_INT
2282 default: NO_WAY("Bad type");
2285 if (iterAtExit < type_MIN)
2291 /*****************************************************************************
2293 * Helper for unroll loops - Computes the number of repetitions
2294 * in a constant loop. If it cannot prove the number is constant returns false
2297 bool Compiler::optComputeLoopRep(int constInit,
2300 genTreeOps iterOper,
2301 var_types iterOperType,
2302 genTreeOps testOper,
2305 unsigned * iterCount)
2307 noway_assert(genActualType(iterOperType) == TYP_INT);
2310 __int64 constLimitX;
2315 // Using this, we can just do a signed comparison with other 32 bit values.
2316 if (unsTest) constLimitX = (unsigned int)constLimit;
2317 else constLimitX = ( signed int)constLimit;
2319 switch (iterOperType)
2321 // For small types, the iteration operator will narrow these values if big
2323 #define INIT_ITER_BY_TYPE(type) constInitX = (type)constInit; iterInc = (type)iterInc;
2325 case TYP_BYTE: INIT_ITER_BY_TYPE( signed char ); break;
2326 case TYP_UBYTE: INIT_ITER_BY_TYPE(unsigned char ); break;
2327 case TYP_SHORT: INIT_ITER_BY_TYPE( signed short); break;
2328 case TYP_CHAR: INIT_ITER_BY_TYPE(unsigned short); break;
2330 // For the big types, 32 bit arithmetic is performed
2333 case TYP_UINT: if (unsTest) constInitX = (unsigned int)constInit;
2334 else constInitX = ( signed int)constInit;
2338 noway_assert(!"Bad type");
2342 /* If iterInc is zero we have an infinite loop */
2346 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2347 iterSign = (iterInc > 0) ? +1 : -1;
2349 /* Initialize loopCount to zero */
2352 // If dupCond is true then the loop head contains a test which skips
2353 // this loop, if the constInit does not pass the loop test
2354 // Such a loop can execute zero times.
2355 // If dupCond is false then we have a true do-while loop which we
2356 // always execute the loop once before performing the loop test
2360 constInitX += iterInc;
2363 // bail if count is based on wrap-around math
2366 if (constLimitX < constInitX)
2369 else if (constLimitX > constInitX)
2372 /* Compute the number of repetitions */
2376 __int64 iterAtExitX;
2379 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2383 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2384 * have a constant number of iterations or loop forever -
2385 * we have to compute (lim-init) mod iterInc to see if it is zero.
2386 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2387 * which is probably not what the end user wanted, but it is legal.
2392 /* Stepping by one, i.e. Mod with 1 is always zero */
2395 if (((constLimitX - constInitX) % iterInc) != 0)
2401 noway_assert(iterInc < 0);
2402 /* Stepping by -1, i.e. Mod with 1 is always zero */
2405 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2419 if (constInitX != constLimitX)
2420 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2422 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2425 iterAtExitX = (unsigned)iterAtExitX;
2427 // Check if iteration incr will cause overflow for small types
2428 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2431 // iterator with 32bit overflow. Bad for TYP_(U)INT
2432 if (iterAtExitX < constLimitX)
2435 *iterCount = loopCount;
2451 noway_assert(!"Unknown operator for loop iterator");
2465 if (constInitX < constLimitX)
2466 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2468 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2471 iterAtExitX = (unsigned)iterAtExitX;
2473 // Check if iteration incr will cause overflow for small types
2474 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2477 // iterator with 32bit overflow. Bad for TYP_(U)INT
2478 if (iterAtExitX < constLimitX)
2481 *iterCount = loopCount;
2497 noway_assert(!"Unknown operator for loop iterator");
2511 if (constInitX <= constLimitX)
2512 loopCount += (unsigned) ((constLimitX - constInitX) / iterInc) + 1;
2514 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2517 iterAtExitX = (unsigned)iterAtExitX;
2519 // Check if iteration incr will cause overflow for small types
2520 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2523 // iterator with 32bit overflow. Bad for TYP_(U)INT
2524 if (iterAtExitX <= constLimitX)
2527 *iterCount = loopCount;
2543 noway_assert(!"Unknown operator for loop iterator");
2557 if (constInitX > constLimitX)
2558 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2560 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2563 iterAtExitX = (unsigned)iterAtExitX;
2565 // Check if small types will underflow
2566 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2569 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2570 if (iterAtExitX > constLimitX)
2573 *iterCount = loopCount;
2589 noway_assert(!"Unknown operator for loop iterator");
2603 if (constInitX >= constLimitX)
2604 loopCount += (unsigned) ((constLimitX - constInitX) / iterInc) + 1;
2606 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2609 iterAtExitX = (unsigned)iterAtExitX;
2611 // Check if small types will underflow
2612 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2615 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2616 if (iterAtExitX >= constLimitX)
2619 *iterCount = loopCount;
2635 noway_assert(!"Unknown operator for loop iterator");
2640 noway_assert(!"Unknown operator for loop condition");
2647 /*****************************************************************************
2649 * Look for loop unrolling candidates and unroll them
2653 #pragma warning(push)
2654 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
2656 void Compiler::optUnrollLoops()
2658 if (compCodeOpt() == SMALL_CODE)
2661 if (optLoopCount == 0)
2665 if (JitConfig.JitNoUnroll())
2671 if (optCanCloneLoops())
2678 printf("*************** In optUnrollLoops()\n");
2680 /* Look for loop unrolling candidates */
2682 /* Double loop so that after unrolling an inner loop we set change to true
2683 * and we then go back over all of the loop candidates and try to unroll
2684 * the next outer loop, until we don't unroll any loops,
2685 * then change will be false and we are done.
2689 bool change = false;
2691 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2695 BasicBlock * bottom;
2705 int lbeg; // initial value for iterator
2706 int llim; // limit value for iterator
2707 unsigned lvar; // iterator lclVar #
2708 int iterInc; // value to increment the iterator
2709 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2710 var_types iterOperType; // type result of the oper (for overflow instrs)
2711 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2712 bool unsTest; // Is the comparison u/int
2714 unsigned totalIter; // total number of iterations in the constant loop
2715 unsigned loopCostSz; // Cost is size of one iteration
2716 unsigned loopFlags; // actual lpFlags
2717 unsigned requiredFlags; // required lpFlags
2719 GenTree * loopList; // new stmt list of the unrolled loop
2722 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] =
2730 noway_assert(ITER_LIMIT[ SMALL_CODE] == 0);
2731 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2733 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2736 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2740 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] =
2748 noway_assert(UNROLL_LIMIT_SZ[ SMALL_CODE] == 0);
2749 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2751 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2754 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2755 unrollLimitSz *= 10;
2758 loopFlags = optLoopTable[lnum].lpFlags;
2759 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2762 /* Ignore the loop if we don't have a do-while with a single exit
2763 that has a constant number of iterations */
2765 if ((loopFlags & requiredFlags) != requiredFlags)
2768 /* ignore if removed or marked as not unrollable */
2770 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2773 head = optLoopTable[lnum].lpHead; noway_assert(head);
2774 bottom = optLoopTable[lnum].lpBottom; noway_assert(bottom);
2776 /* The single exit must be at the bottom of the loop */
2777 noway_assert(optLoopTable[lnum].lpExit);
2778 if (optLoopTable[lnum].lpExit != bottom)
2781 /* Unrolling loops with jumps in them is not worth the headache
2782 * Later we might consider unrolling loops after un-switching */
2787 block = block->bbNext; noway_assert(block);
2789 if (block->bbJumpKind != BBJ_NONE)
2791 if (block != bottom)
2795 while (block != bottom);
2797 /* Get the loop data:
2801 - iterator increment
2802 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2803 - loop test type (i.e. GT_GE, GT_LT, etc...)
2806 lbeg = optLoopTable[lnum].lpConstInit;
2807 llim = optLoopTable[lnum].lpConstLimit();
2808 testOper = optLoopTable[lnum].lpTestOper();
2810 lvar = optLoopTable[lnum].lpIterVar();
2811 iterInc = optLoopTable[lnum].lpIterConst();
2812 iterOper = optLoopTable[lnum].lpIterOper();
2814 iterOperType= optLoopTable[lnum].lpIterOperType();
2815 unsTest =(optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2817 if (lvaTable[lvar].lvAddrExposed) // If the loop iteration variable is address-exposed then bail
2819 if (lvaTable[lvar].lvIsStructField) // If the loop iteration variable is a promoted field from a struct then bail
2822 /* Locate the pre-header and initialization and increment/test statements */
2824 phdr = head->bbTreeList; noway_assert(phdr);
2825 loop = bottom->bbTreeList; noway_assert(loop);
2827 init = head->lastStmt(); noway_assert(init && (init->gtNext == 0));
2828 test = bottom->lastStmt(); noway_assert(test && (test->gtNext == 0));
2829 incr = test->gtPrev; noway_assert(incr);
2831 if (init->gtFlags & GTF_STMT_CMPADD)
2833 /* Must be a duplicated loop condition */
2834 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2837 init = init->gtPrev; noway_assert(init);
2842 /* Find the number of iterations - the function returns false if not a constant number */
2844 if (!optComputeLoopRep(lbeg, llim,
2845 iterInc, iterOper, iterOperType,
2846 testOper, unsTest, dupCond,
2850 /* Forget it if there are too many repetitions or not a constant loop */
2852 if (totalIter > iterLimit)
2855 noway_assert(init->gtOper == GT_STMT); init = init->gtStmt.gtStmtExpr;
2856 noway_assert(test->gtOper == GT_STMT); test = test->gtStmt.gtStmtExpr;
2857 noway_assert(incr->gtOper == GT_STMT); incr = incr->gtStmt.gtStmtExpr;
2859 // Don't unroll loops we don't understand.
2860 if (incr->gtOper == GT_ASG)
2865 /* Make sure everything looks ok */
2867 (init->gtOper != GT_ASG) ||
2868 (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
2869 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
2870 (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
2871 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
2873 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
2874 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
2875 (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
2876 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
2877 (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
2879 (test->gtOper != GT_JTRUE) )
2881 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
2885 /* heuristic - Estimated cost in code size of the unrolled loop */
2893 block = block->bbNext;
2895 /* Visit all the statements in the block */
2897 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
2899 /* Get the expression and stop if end reached */
2901 GenTreePtr expr = stmt->gtStmtExpr;
2905 /* Calculate gtCostSz */
2906 gtSetStmtInfo(stmt);
2908 /* Update loopCostSz */
2909 loopCostSz += stmt->gtCostSz;
2912 while (block != bottom);
2914 /* Compute the estimated increase in code size for the unrolled loop */
2916 unsigned int fixedLoopCostSz; fixedLoopCostSz = 8;
2918 int unrollCostSz; unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
2920 /* Don't unroll if too much code duplication would result. */
2922 if (unrollCostSz > unrollLimitSz)
2924 /* prevent this loop from being revisited */
2925 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
2929 /* Looks like a good idea to unroll this loop, let's do it! */
2930 CLANG_FORMAT_COMMENT_ANCHOR;
2935 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
2936 if (head->bbNext->bbNum != bottom->bbNum)
2937 printf("..BB%02u", bottom->bbNum);
2938 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
2939 printf(" unrollCostSz = %d\n", unrollCostSz);
2944 /* Create the unrolled loop statement list */
2949 for (lval = lbeg; totalIter; totalIter--)
2958 block = block->bbNext; noway_assert(block);
2960 /* Visit all the statements in the block */
2962 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
2964 /* Stop if we've reached the end of the loop */
2966 if (stmt->gtStmtExpr == incr)
2969 /* Clone/substitute the expression */
2971 expr = gtCloneExpr(stmt, 0, lvar, lval);
2973 // cloneExpr doesn't handle everything
2977 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
2981 /* Append the expression to our list */
2984 loopLast->gtNext = expr;
2988 expr->gtPrev = loopLast;
2992 while (block != bottom);
2994 /* update the new value for the unrolled iterator */
3008 noway_assert(!"Unrolling not implemented for this loop iterator");
3012 noway_assert(!"Unknown operator for constant loop iterator");
3017 /* Finish the linked list */
3021 loopList->gtPrev = loopLast;
3022 loopLast->gtNext = 0;
3025 /* Replace the body with the unrolled one */
3031 block = block->bbNext; noway_assert(block);
3032 block->bbTreeList = 0;
3033 block->bbJumpKind = BBJ_NONE;
3034 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3036 while (block != bottom);
3038 bottom->bbJumpKind = BBJ_NONE;
3039 bottom->bbTreeList = loopList;
3040 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3041 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3045 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3047 /* Update bbRefs and bbPreds */
3048 /* Here head->bbNext is bottom !!! - Replace it */
3050 fgRemoveRefPred(head->bbNext, bottom);
3052 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3053 * (the last value of the iterator in the loop)
3054 * and drop the jump condition since the unrolled loop will always execute */
3056 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3058 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3060 if (head->bbJumpKind == BBJ_COND)
3062 phdr = head->bbTreeList; noway_assert(phdr);
3063 test = phdr->gtPrev;
3065 noway_assert(test && (test->gtNext == 0));
3066 noway_assert(test->gtOper == GT_STMT);
3067 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3069 init = test->gtPrev; noway_assert(init && (init->gtNext == test));
3070 noway_assert(init->gtOper == GT_STMT);
3073 phdr->gtPrev = init;
3074 head->bbJumpKind = BBJ_NONE;
3075 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3077 /* Update bbRefs and bbPreds */
3079 fgRemoveRefPred(head->bbJumpDest, head);
3083 /* the loop must execute */
3084 noway_assert(head->bbJumpKind == BBJ_NONE);
3090 printf("Whole unrolled loop:\n");
3092 GenTreePtr s = loopList;
3096 noway_assert(s->gtOper == GT_STMT);
3107 /* Remember that something has changed */
3111 /* Make sure to update loop table */
3113 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3114 * (also make head and bottom NULL - to hit an assert or GPF) */
3116 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3117 optLoopTable[lnum].lpHead =
3118 optLoopTable[lnum].lpBottom = nullptr;
3128 fgDebugCheckBBlist();
3132 #pragma warning(pop)
3135 /*****************************************************************************
3137 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3138 * not execute a method call.
3141 bool Compiler::optReachWithoutCall(BasicBlock *topBB,
3144 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3145 // as some helper calls are neither interruptible nor hijackable.
3146 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3147 // those helpers too.
3149 noway_assert(topBB->bbNum <= botBB->bbNum);
3151 // We can always check topBB and botBB for any gc safe points and early out
3153 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3156 // Otherwise we will need to rely upon the dominator sets
3158 if (!fgDomsComputed)
3160 // return a conservative answer of true when we don't have the dominator sets
3164 BasicBlock *curBB = topBB;
3167 noway_assert(curBB);
3169 // If we added a loop pre-header block then we will
3170 // have a bbNum greater than fgLastBB, and we won't have
3171 // any dominator information about this block, so skip it.
3173 if (curBB->bbNum <= fgLastBB->bbNum)
3175 noway_assert(curBB->bbNum <= botBB->bbNum);
3177 // Does this block contain a gc safe point?
3179 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3181 // Will this block always execute on the way to botBB ?
3183 // Since we are checking every block in [topBB .. botBB] and we are using
3184 // a lexical definition of a loop.
3185 // (all that we know is that is that botBB is a back-edge to topBB)
3186 // Thus while walking blocks in this range we may encounter some blocks
3187 // that are not really part of the loop, and so we need to perform
3188 // some additional checks:
3190 // We will check that the current 'curBB' is reachable from 'topBB'
3191 // and that it dominates the block containing the back-edge 'botBB'
3192 // When both of these are true then we know that the gcsafe point in 'curBB'
3193 // will be encountered in the loop and we can return false
3195 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3200 // If we've reached the destination block, then we're done
3207 curBB = curBB->bbNext;
3210 // If we didn't find any blocks that contained a gc safe point and
3211 // also met the fgDominate and fgReachable criteria then we must return true
3216 /*****************************************************************************
3218 * Find the loop termination test at the bottom of the loop
3222 GenTreePtr optFindLoopTermTest(BasicBlock *bottom)
3224 GenTreePtr testt = bottom->bbTreeList;
3226 assert(testt && testt->gtOper == GT_STMT);
3228 GenTreePtr result = testt->gtPrev;
3231 while (testt->gtNext)
3232 testt = testt->gtNext;
3234 assert(testt == result);
3240 /*****************************************************************************
3241 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3244 void Compiler::fgOptWhileLoop(BasicBlock * block)
3246 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3247 noway_assert(compCodeOpt() != SMALL_CODE);
3250 Optimize while loops into do { } while loop
3251 Our loop hoisting logic requires do { } while loops.
3252 Specifically, we're looking for the following case:
3263 If we find this, and the condition is simple enough, we change
3264 the loop to the following:
3269 // else fall-through
3280 /* Does the BB end with an unconditional jump? */
3282 if (block->bbJumpKind != BBJ_ALWAYS ||
3283 (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)) // It can't be one of the ones we use for our exception magic
3286 // It has to be a forward jump
3287 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3289 if (fgIsForwardBranch(block) == false)
3292 // Get hold of the jump target
3293 BasicBlock * bTest = block->bbJumpDest;
3295 // Does the block consist of 'jtrue(cond) block' ?
3296 if (bTest->bbJumpKind != BBJ_COND)
3299 // bTest must be a backwards jump to block->bbNext
3300 if (bTest->bbJumpDest != block->bbNext)
3303 // Since test is a BBJ_COND it will have a bbNext
3304 noway_assert(bTest->bbNext);
3306 // 'block' must be in the same try region as the condition, since we're going to insert
3307 // a duplicated condition in 'block', and the condition might include exception throwing code.
3308 if (!BasicBlock::sameTryRegion(block, bTest))
3311 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3312 // same try region (or no try region) to avoid generating illegal flow.
3313 BasicBlock* bTestNext = bTest->bbNext;
3314 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3317 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3319 // bTest must only contain only a jtrue with no other stmts, we will only clone
3320 // the conditional, so any other statements will not get cloned
3321 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3323 if (bTest->bbTreeList != condStmt)
3326 /* Get to the condition node from the statement tree */
3328 noway_assert(condStmt->gtOper == GT_STMT);
3330 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3331 noway_assert(condTree->gtOper == GT_JTRUE);
3333 condTree = condTree->gtOp.gtOp1;
3335 // The condTree has to be a RelOp comparison
3336 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3338 if (condTree->OperIsCompare() == false)
3341 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3343 gtPrepareCost(condTree);
3344 unsigned estDupCostSz = condTree->gtCostSz;
3346 double loopIterations = (double) BB_LOOP_WEIGHT;
3348 bool allProfileWeightsAreValid = false;
3349 BasicBlock::weight_t weightBlock = block->bbWeight;
3350 BasicBlock::weight_t weightTest = bTest->bbWeight;
3351 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3353 // If we have profile data then we calculate the number of time
3354 // the loop will iterate into loopIterations
3355 if (fgIsUsingProfileWeights())
3357 // Only rely upon the profile weight when all three of these blocks
3358 // have good profile weights
3359 if ((block->bbFlags & BBF_PROF_WEIGHT) &&
3360 (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3361 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3363 allProfileWeightsAreValid = true;
3365 // If this while loop never iterates then don't bother transforming
3366 if (weightNext == 0)
3369 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3370 // if the profile weights are all valid.
3372 // weightNext is the number of time this loop iterates
3373 // weightBlock is the number of times that we enter the while loop
3374 // loopIterations is the average number of times that this loop iterates
3376 if (weightTest >= weightBlock)
3378 loopIterations = (double) block->bbNext->bbWeight / (double) block->bbWeight;
3383 unsigned maxDupCostSz = 32;
3385 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3386 // set loop weights yet
3387 if ((compCodeOpt() == FAST_CODE) ||
3388 compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3393 // If this loop iterates a lot then raise the maxDupCost
3394 if (loopIterations >= 12.0)
3396 if (loopIterations >= 96.0)
3399 // If the loop condition has a shared static helper, we really want this loop converted
3400 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3401 // be executed on every loop iteration.
3402 int countOfHelpers = 0;
3403 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3405 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3407 maxDupCostSz += 24 * min(countOfHelpers, (int) (loopIterations + 1.5));
3410 // If the compare has too high cost then we don't want to dup
3412 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3417 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3418 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3420 costIsTooHigh ? "not done" : "performed",
3422 costIsTooHigh ? "greater" : "less or equal",
3424 loopIterations, countOfHelpers,
3425 allProfileWeightsAreValid ? "true" : "false");
3434 /* Looks good - duplicate the condition test */
3436 condTree->gtFlags |= GTF_RELOP_ZTT;
3438 condTree = gtCloneExpr(condTree);
3439 gtReverseCond(condTree);
3441 // Make sure clone expr copied the flag
3442 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3444 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3446 /* Create a statement entry out of the condition and
3447 append the condition test at the end of 'block' */
3449 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3451 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3453 #ifdef DEBUGGING_SUPPORT
3454 if (opts.compDbgInfo)
3455 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3458 // If we have profile data for all blocks and we know that we are cloning the
3459 // bTest block into block and thus changing the control flow from block so
3460 // that it no longer goes directly to bTest anymore, we have to adjust the
3461 // weight of bTest by subtracting out the weight of block.
3463 if (allProfileWeightsAreValid)
3466 // Some additional sanity checks before adjusting the weight of bTest
3468 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3470 // Get the two edge that flow out of bTest
3471 flowList * edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3472 flowList * edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3474 // Calculate the new weight for block bTest
3476 BasicBlock::weight_t newWeightTest = (weightTest > weightBlock)
3477 ? (weightTest - weightBlock)
3479 bTest->bbWeight = newWeightTest;
3481 if (newWeightTest == BB_ZERO_WEIGHT)
3483 bTest->bbFlags |= BBF_RUN_RARELY;
3484 // All out edge weights are set to zero
3485 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3486 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3487 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3488 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3492 // Update the our edge weights
3493 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3494 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3495 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3496 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3501 /* Change the block to end with a conditional jump */
3503 block->bbJumpKind = BBJ_COND;
3504 block->bbJumpDest = bTest->bbNext;
3506 /* Mark the jump dest block as being a jump target */
3507 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
3509 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3511 fgAddRefPred(block->bbNext, block);
3513 fgRemoveRefPred(bTest, block);
3514 fgAddRefPred(bTest->bbNext, block);
3519 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)",
3520 block->bbNum, block->bbNext->bbNum, bTest->bbNum);
3521 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3523 gtDispTree(copyOfCondStmt);
3529 /*****************************************************************************
3531 * Optimize the BasicBlock layout of the method
3534 void Compiler::optOptimizeLayout()
3536 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3541 printf("*************** In optOptimizeLayout()\n");
3545 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3546 fgDebugCheckBBlist();
3549 noway_assert(fgModified == false);
3551 for (BasicBlock *block = fgFirstBB; block; block = block->bbNext)
3553 /* Make sure the appropriate fields are initialized */
3555 if (block->bbWeight == BB_ZERO_WEIGHT)
3557 /* Zero weighted block can't have a LOOP_HEAD flag */
3558 noway_assert(block->isLoopHead() == false);
3562 assert(block->bbLoopNum == 0);
3564 if (compCodeOpt() != SMALL_CODE)
3566 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3568 fgOptWhileLoop(block);
3574 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3575 fgComputeEdgeWeights();
3578 fgUpdateFlowGraph(true);
3580 fgUpdateFlowGraph();
3583 /*****************************************************************************
3585 * Perform loop inversion, find and classify natural loops
3588 void Compiler::optOptimizeLoops()
3590 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3594 printf("*************** In optOptimizeLoops()\n");
3597 optSetBlockWeights();
3599 /* Were there any loops in the flow graph? */
3603 /* now that we have dominator information we can find loops */
3605 optFindNaturalLoops();
3607 unsigned loopNum = 0;
3609 /* Iterate over the flow graph, marking all loops */
3611 /* We will use the following terminology:
3612 * top - the first basic block in the loop (i.e. the head of the backward edge)
3613 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3614 * lastBottom - used when we have multiple back-edges to the same top
3621 for (top = fgFirstBB; top; top = top->bbNext)
3623 BasicBlock * foundBottom = NULL;
3625 for (pred = top->bbPreds; pred; pred = pred->flNext)
3627 /* Is this a loop candidate? - We look for "back edges" */
3629 BasicBlock * bottom = pred->flBlock;
3631 /* is this a backward edge? (from BOTTOM to TOP) */
3633 if (top->bbNum > bottom->bbNum)
3636 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3638 if (top->isLoopHead() == false)
3641 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3643 if ((bottom->bbJumpKind != BBJ_COND) &&
3644 (bottom->bbJumpKind != BBJ_ALWAYS) )
3647 /* the top block must be able to reach the bottom block */
3648 if (!fgReachable(top, bottom))
3651 /* Found a new loop, record the longest backedge in foundBottom */
3653 if ((foundBottom == NULL) || (bottom->bbNum > foundBottom->bbNum))
3655 foundBottom = bottom;
3663 /* Mark the loop header as such */
3664 assert(FitsIn<unsigned char>(loopNum));
3665 top->bbLoopNum = (unsigned char) loopNum;
3668 /* Mark all blocks between 'top' and 'bottom' */
3670 optMarkLoopBlocks(top, foundBottom, false);
3673 // We track at most 255 loops
3677 totalUnnatLoopOverflows++;
3684 totalUnnatLoopCount += loopNum;
3692 printf("\nFound a total of %d loops.", loopNum);
3693 printf("\nAfter loop weight marking:\n");
3694 fgDispBasicBlocks();
3699 optLoopsMarked = true;
3703 //------------------------------------------------------------------------
3704 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3707 // loopNum - the current loop index for which conditions are derived.
3708 // context - data structure where all loop cloning info is kept.
3711 // "false" if conditions cannot be obtained. "true" otherwise.
3712 // The cloning conditions are updated in the "conditions"[loopNum] field
3713 // of the "context" parameter.
3716 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3717 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3718 // condition is "less than". If the initializer is "var" init then adds condition
3719 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3720 // are added to "context". These conditions are checked in the pre-header block
3721 // and the cloning choice is made.
3724 // Callers should assume AND operation is used i.e., if all conditions are
3725 // true, then take the fast path.
3727 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3729 JITDUMP("------------------------------------------------------------\n");
3730 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3732 LoopDsc* loop = &optLoopTable[loopNum];
3733 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3735 if (loop->lpTestOper() == GT_LT)
3737 // Stride conditions
3738 if (loop->lpIterConst() <= 0)
3740 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3745 if (loop->lpFlags & LPFLG_CONST_INIT)
3747 // Only allowing const init at this time.
3748 if (loop->lpConstInit < 0)
3750 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3754 else if (loop->lpFlags & LPFLG_VAR_INIT)
3757 LC_Condition geZero(GT_GE,
3758 LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3759 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3760 context->EnsureConditions(loopNum)->Push(geZero);
3764 JITDUMP("> Not variable init\n");
3770 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3772 int limit = loop->lpConstLimit();
3775 JITDUMP("> limit %d is invalid\n", limit);
3778 ident = LC_Ident(limit, LC_Ident::Const);
3780 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3782 unsigned limitLcl = loop->lpVarLimit();
3783 ident = LC_Ident(limitLcl, LC_Ident::Var);
3785 LC_Condition geZero(GT_GE,
3787 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3789 context->EnsureConditions(loopNum)->Push(geZero);
3791 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
3793 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
3794 if (!loop->lpArrLenLimit(this, index))
3796 JITDUMP("> ArrLen not matching");
3799 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
3801 // Ensure that this array must be dereference-able, before executing the actual condition.
3802 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
3803 context->EnsureDerefs(loopNum)->Push(array);
3807 JITDUMP("> Undetected limit\n");
3811 for (unsigned i = 0; i < optInfos->Size(); ++i)
3813 LcOptInfo* optInfo = optInfos->GetRef(i);
3814 switch (optInfo->GetOptType())
3816 case LcOptInfo::LcJaggedArray:
3819 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
3820 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
3821 LC_Ident arrLenIdent = LC_Ident(arrLen);
3823 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
3824 context->EnsureConditions(loopNum)->Push(cond);
3826 // Ensure that this array must be dereference-able, before executing the actual condition.
3827 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
3828 context->EnsureDerefs(loopNum)->Push(array);
3831 case LcOptInfo::LcMdArray:
3833 // limit <= mdArrLen
3834 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
3835 LC_Condition cond(GT_LE,
3839 LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator()), mdArrInfo->dim, LC_Array::None)
3841 context->EnsureConditions(loopNum)->Push(cond);
3846 JITDUMP("Unknown opt\n");
3850 JITDUMP("Conditions: (");
3851 DBEXEC(verbose, context->PrintConditions(loopNum));
3858 //------------------------------------------------------------------------------------
3859 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
3862 // loopNum - the current loop index for which conditions are derived.
3863 // context - data structure where all loop cloning info is kept.
3866 // "false" if conditions cannot be obtained. "true" otherwise.
3867 // The deref conditions are updated in the "derefConditions"[loopNum] field
3868 // of the "context" parameter.
3870 // Definition of Deref Conditions:
3871 // To be able to check for the loop cloning condition that (limitVar <= a.len)
3872 // we should first be able to dereference "a". i.e., "a" is non-null.
3878 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
3879 // // (n <= a[i][j].len) and other safer conditions to take the fast path
3882 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
3883 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
3884 // be true to do the deref.
3886 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
3888 // Note the short circuiting AND. Implication: these conditions should be performed in separate
3889 // blocks each of which will branch to slow path if the condition evaluates to false.
3891 // Now, imagine a situation where we have
3892 // a[x][y][k] = 20 and a[i][j][k] = 0
3893 // also in the inner most loop where x, y are parameters, then our conditions will have
3897 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
3899 // But these conditions can be checked together with conditions
3900 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
3903 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
3904 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
3905 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
3906 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
3908 // This naturally yields a tree style pattern, where the nodes of the tree are
3909 // the array and indices respectively.
3925 // Notice that the variables in the same levels can have their conditions combined in the
3926 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
3927 // combined with a short-circuiting AND (i.e., different basic blocks).
3930 // Construct a tree of array indices and the array which will generate the optimal
3931 // conditions for loop cloning.
3933 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
3948 // In this method, we will construct such a tree by descending depth first into the array
3949 // index operation and forming a tree structure as we encounter the array or the index variables.
3951 // This tree structure will then be used to generate conditions like below:
3952 // (a != null) & (b != null) && // from the first level of the tree.
3954 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
3955 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
3957 // (j < a[i].len) & (y < a[i].len) && // from the third level.
3958 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
3963 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
3965 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
3968 // Get the dereference-able arrays.
3969 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
3971 // For each array in the dereference list, construct a tree,
3972 // where the nodes are array and index variables and an edge 'u-v'
3973 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
3974 // 'u-v-w' transitively if u[v][w] occurs.
3975 for (unsigned i = 0; i < deref->Size(); ++i)
3977 LC_Array& array = (*deref)[i];
3979 // First populate the array base variable.
3980 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
3981 if (node == nullptr)
3983 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
3987 // For each dimension (level) for the array, populate the tree with the variable
3988 // from that dimension.
3989 unsigned rank = (unsigned) array.GetDimRank();
3990 for (unsigned i = 0; i < rank; ++i)
3992 node->EnsureChildren(getAllocator());
3993 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
3996 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
3997 node->children->Push(tmp);
4000 // Descend one level down.
4004 // Keep the maxRank of all array dereferences.
4005 maxRank = max((int) rank, maxRank);
4011 for (unsigned i = 0; i < nodes.Size(); ++i)
4013 if (i != 0) printf(",");
4025 // First level will always yield the null-check, since it is made of the array base variables.
4026 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4027 // So add 1 after rank * 2.
4028 unsigned condBlocks = (unsigned) maxRank * 2 + 1;
4030 // Heuristic to not create too many blocks;
4036 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4037 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4038 for (unsigned i = 0; i < nodes.Size(); ++i)
4040 nodes[i]->DeriveLevelConditions(levelCond);
4043 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4048 //----------------------------------------------------------------------------
4049 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4052 // block - the block in which the helper call needs to be inserted.
4053 // insertBefore - the tree before which the helper call will be inserted.
4055 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4057 if (JitConfig.JitDebugLogLoopCloning() == 0)
4061 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4062 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4063 fgInsertStmtBefore(block, insertBefore, stmt);
4064 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4068 //------------------------------------------------------------------------
4069 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4070 // candidates gathered during the cloning phase.
4073 // loopNum - the current loop index for which the optimizations are performed.
4074 // context - data structure where all loop cloning info is kept.
4075 // dynamicPath - If true, the optimization is performed in the fast path among the
4076 // cloned loops. If false, it means this is the only path (i.e.,
4077 // there is no slow path.)
4080 // Perform the optimizations on the fast path i.e., the path in which the
4081 // optimization candidates were collected at the time of identifying them.
4082 // The candidates store all the information necessary (the tree/stmt/block
4083 // they are from) to perform the optimization.
4086 // The unoptimized path is either already cloned when this method is called or
4087 // there is no unoptimized path (got eliminated statically.) So this method
4088 // performs the optimizations assuming that the path in which the candidates
4089 // were collected is the fast path in which the optimizations will be performed.
4091 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4093 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4094 for (unsigned i = 0; i < optInfos->Size(); ++i)
4096 LcOptInfo* optInfo = optInfos->GetRef(i);
4097 switch (optInfo->GetOptType())
4099 case LcOptInfo::LcJaggedArray:
4101 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4102 compCurBB = arrIndexInfo->arrIndex.useBlock;
4103 optRemoveRangeCheck(
4104 arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim],
4105 arrIndexInfo->stmt, true, GTF_ASG, true);
4106 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4109 case LcOptInfo::LcMdArray:
4110 // TODO-CQ: CLONE: Implement.
4119 //----------------------------------------------------------------------------
4120 // optCanCloneLoops: Use the environment flag to determine whether loop
4121 // cloning is allowed to be performed.
4124 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4125 // Disabled for retail for now.
4127 bool Compiler::optCanCloneLoops()
4129 // Enabled for retail builds now.
4130 unsigned cloneLoopsFlag = 1;
4132 cloneLoopsFlag = JitConfig.JitCloneLoops();
4134 return (cloneLoopsFlag != 0);
4138 //----------------------------------------------------------------------------
4139 // optIsLoopClonable: Determine whether this loop can be cloned.
4142 // loopInd loop index which needs to be checked if it can be cloned.
4145 // Returns true if the loop can be cloned. If it returns false
4146 // prints a message in debug as why the loop can't be cloned.
4148 bool Compiler::optIsLoopClonable(unsigned loopInd)
4150 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4151 // inserting new EH regions in the exception table yet.
4152 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4153 unsigned loopRetCount = 0;
4154 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4156 if (blk->bbJumpKind == BBJ_RETURN) loopRetCount++;
4157 if (bbIsTryBeg(blk))
4159 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n",
4160 loopInd, info.compFullName);
4165 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4166 // into the middle of a handler (to go to the cloned copy.) Reject.
4167 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4169 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4173 // If the head and entry are in different EH regions, reject.
4174 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4176 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4180 // Is the first block after the last block of the loop a handler or filter start?
4181 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4182 // and go to where the original loop did. That raises problems when we don't actually go to
4183 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4184 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4185 // loop. This is just a corner to cut to get this working faster.
4186 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4187 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4189 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4193 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4194 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4195 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4196 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4197 if (fgReturnCount + loopRetCount > 4)
4199 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, would exceed the limit of 4.\n", loopRetCount, fgReturnCount);
4203 // Otherwise, we're going to add those return blocks.
4204 fgReturnCount += loopRetCount;
4209 /*****************************************************************************
4211 * Identify loop cloning opportunities, derive loop cloning conditions,
4212 * perform loop cloning, use the derived conditions to choose which
4215 void Compiler::optCloneLoops()
4217 JITDUMP("\n*************** In optCloneLoops()\n");
4218 if (optLoopCount == 0 || !optCanCloneLoops())
4226 printf("Blocks/Trees at start of phase\n");
4227 fgDispBasicBlocks(true);
4231 LoopCloneContext context(optLoopCount, getAllocator());
4233 // Obtain array optimization candidates in the context.
4234 optObtainLoopCloningOpts(&context);
4236 // For each loop, derive cloning conditions for the optimization candidates.
4237 for (unsigned i = 0; i < optLoopCount; ++i)
4239 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4240 if (optInfos == nullptr)
4245 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4247 JITDUMP("> Conditions could not be obtained\n");
4248 context.CancelLoopOptInfo(i);
4252 bool allTrue = false;
4253 bool anyFalse = false;
4254 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4257 context.CancelLoopOptInfo(i);
4261 // Perform static optimizations on the fast path since we always
4262 // have to take the cloned path.
4263 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4265 // No need to clone.
4266 context.CancelLoopOptInfo(i);
4273 // The code in this #if has been useful in debugging loop cloning issues, by
4274 // enabling selective enablement of the loop cloning optimization according to
4277 unsigned methHash = info.compMethodHash();
4278 char* lostr = getenv("loopclonehashlo");
4279 unsigned methHashLo = 0;
4282 sscanf_s(lostr, "%x", &methHashLo);
4283 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4285 char* histr = getenv("loopclonehashhi");
4286 unsigned methHashHi = UINT32_MAX;
4289 sscanf_s(histr, "%x", &methHashHi);
4290 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4292 if (methHash < methHashLo || methHash > methHashHi)
4297 for (unsigned i = 0; i < optLoopCount; ++i)
4299 if (context.GetLoopOptInfo(i) != nullptr)
4302 context.OptimizeConditions(i DEBUGARG(verbose));
4303 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4304 optCloneLoop(i, &context);
4311 printf("\nAfter loop cloning:\n");
4312 fgDispBasicBlocks(/*dumpTrees*/true);
4317 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4319 assert(loopInd < optLoopCount);
4321 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n",
4323 optLoopTable[loopInd].lpHead->bbNum,
4324 optLoopTable[loopInd].lpFirst->bbNum,
4325 optLoopTable[loopInd].lpTop->bbNum,
4326 optLoopTable[loopInd].lpEntry->bbNum,
4327 optLoopTable[loopInd].lpBottom->bbNum);
4329 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4330 unsigned depth = optLoopDepth(loopInd);
4331 unsigned ambientWeight = 1;
4332 for (unsigned j = 0; j < depth; j++)
4334 unsigned lastWeight = ambientWeight;
4335 ambientWeight *= BB_LOOP_WEIGHT;
4336 // If the multiplication overflowed, stick at max.
4337 // (Strictly speaking, a multiplication could overflow and still have a result
4338 // that is >= lastWeight...but if so, the original weight must be pretty large,
4339 // and it got bigger, so that's OK.)
4340 if (ambientWeight < lastWeight)
4342 ambientWeight = BB_MAX_WEIGHT;
4347 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4348 // Be safe by taking the max with the head block's weight.
4349 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4351 // This is the containing loop, if any -- to label any blocks we create that are outside
4352 // the loop being cloned.
4353 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4355 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4356 optEnsureUniqueHead(loopInd, ambientWeight);
4358 // We're going to make
4370 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4382 BasicBlock* h = optLoopTable[loopInd].lpHead;
4383 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4385 // Make a new block to be the unique entry to the loop.
4386 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4387 BasicBlock* newH = fgNewBBafter(BBJ_NONE,
4389 /*extendRegion*/true);
4390 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4391 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4392 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4393 newH->bbNatLoopNum = ambientLoop;
4395 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4398 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4399 // "newPred" will be the predecessor of the blocks of the cloned loop.
4400 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4401 BasicBlock* newPred = b;
4402 if (b->bbJumpKind != BBJ_ALWAYS)
4404 BasicBlock* x = b->bbNext;
4407 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/true);
4408 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4410 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4411 x2->bbNatLoopNum = ambientLoop;
4414 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4419 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4420 // so that "h" already falls through to "e" (e == t == f).
4421 BasicBlock* h2 = nullptr;
4422 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4424 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS,
4425 optLoopTable[loopInd].lpHead,
4426 /*extendRegion*/true);
4427 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4429 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4430 h2->bbNatLoopNum = ambientLoop;
4432 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4433 optUpdateLoopHead(loopInd,optLoopTable[loopInd].lpHead, h2);
4436 // Now we'll clone the blocks of the loop body.
4437 BasicBlock* newFirst = nullptr;
4438 BasicBlock* newBot = nullptr;
4440 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4441 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst;
4442 blk != optLoopTable[loopInd].lpBottom->bbNext;
4445 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind,
4447 /*extendRegion*/true);
4449 BasicBlock::CloneBlockState(this, newBlk, blk);
4450 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4451 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4452 // loop, if one exists -- the parent of the loop we're cloning.
4453 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4455 if (newFirst == nullptr) newFirst = newBlk;
4456 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4458 blockMap->Set(blk, newBlk);
4461 // Perform the static optimizations on the fast path.
4462 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4464 // Now go through the new blocks, remapping their jump targets within the loop.
4465 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst;
4466 blk != optLoopTable[loopInd].lpBottom->bbNext;
4470 BasicBlock* newblk = nullptr;
4471 bool b = blockMap->Lookup(blk, &newblk);
4472 assert(b && newblk != nullptr);
4474 assert(blk->bbJumpKind == newblk->bbJumpKind);
4476 // First copy the jump destination(s) from "blk".
4477 optCopyBlkDest(blk, newblk);
4479 // Now redirect the new block according to "blockMap".
4480 optRedirectBlock(newblk, blockMap);
4483 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry))
4484 || (h->bbJumpKind == BBJ_ALWAYS));
4486 // If all the conditions are true, go to E2.
4487 BasicBlock* e2 = nullptr;
4488 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4490 h->bbJumpKind = BBJ_COND;
4492 // We will create the following structure
4494 // cond0 (in h) -?> cond1
4495 // slow --> e2 (slow) always
4502 // We should always have block conditions, at the minimum, the array should be deref-able
4503 assert(context->HasBlockConditions(loopInd));
4505 // Create a unique header for the slow path.
4506 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4507 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4508 slowHead->bbNatLoopNum = ambientLoop;
4509 slowHead->bbJumpDest = e2;
4511 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4512 condLast->bbJumpDest = slowHead;
4514 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4517 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4519 assert(foundIt && e2 != NULL);
4521 fgUpdateChangedFlowGraph();
4524 //--------------------------------------------------------------------------------------------------
4525 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4528 // context loop cloning context variable
4529 // loopNum the loop index
4530 // head loop head for "loopNum"
4531 // slowHead the slow path loop head
4537 // Create the following structure.
4539 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4540 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4542 // cond0 (in h) -?> cond1
4543 // slowHead --> e2 (slowHead) always
4544 // !cond1 -?> slowHead
4545 // !cond2 -?> slowHead
4547 // !condn -?> slowHead
4550 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4552 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, unsigned loopNum, BasicBlock* head, BasicBlock* slowHead)
4554 JITDUMP("Inserting loop cloning conditions\n");
4555 assert(context->HasBlockConditions(loopNum));
4557 BasicBlock* curCond = head;
4558 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4559 for (unsigned i = 0; i < levelCond->Size(); ++i)
4561 bool isHeaderBlock = (curCond == head);
4563 // Flip the condition if header block.
4564 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4566 // Create each condition block ensuring wiring between them.
4567 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4568 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4571 curCond->inheritWeight(head);
4572 curCond->bbNatLoopNum = head->bbNatLoopNum;
4573 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4576 // Finally insert cloning conditions after all deref conditions have been inserted.
4577 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4581 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4583 BasicBlock* h = optLoopTable[loopInd].lpHead;
4584 BasicBlock* t = optLoopTable[loopInd].lpTop;
4585 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4586 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4588 // If "h" dominates the entry block, then it is the unique header.
4589 if (fgDominate(h, e)) return;
4591 // Otherwise, create a new empty header block, make it the pred of the entry block,
4592 // and redirect the preds of the entry block to go to this.
4594 BasicBlock* beforeTop = t->bbPrev;
4595 // Make sure that the new block is in the same region as the loop.
4596 // (We will only create loops that are entirely within a region.)
4597 BasicBlock * h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4598 // This is in the containing loop.
4599 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4600 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4602 // We don't care where it was put; splice it between beforeTop and top.
4603 if (beforeTop->bbNext != h2)
4605 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4606 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4610 if (h2->bbNext != e)
4612 h2->bbJumpKind = BBJ_ALWAYS;
4615 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4617 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4618 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4619 blockMap->Set(e, h2);
4621 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4623 BasicBlock* predBlock = predEntry->flBlock;
4625 // Skip if predBlock is in the loop.
4626 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum) continue;
4627 optRedirectBlock(predBlock, blockMap);
4630 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4633 /*****************************************************************************
4635 * Determine the kind of interference for the call.
4639 Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4641 assert(call->gtOper == GT_CALL);
4643 // if not a helper, kills everything
4644 if (call->gtCall.gtCallType != CT_HELPER)
4647 // setfield and array address store kill all indirections
4648 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4650 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4651 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4652 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4653 case CORINFO_HELP_SETFIELDOBJ:
4654 case CORINFO_HELP_ARRADDR_ST:
4656 return CALLINT_REF_INDIRS;
4658 case CORINFO_HELP_SETFIELDFLOAT:
4659 case CORINFO_HELP_SETFIELDDOUBLE:
4660 case CORINFO_HELP_SETFIELD8:
4661 case CORINFO_HELP_SETFIELD16:
4662 case CORINFO_HELP_SETFIELD32:
4663 case CORINFO_HELP_SETFIELD64:
4665 return CALLINT_SCL_INDIRS;
4667 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4668 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4669 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4670 case CORINFO_HELP_SETFIELDSTRUCT:
4672 return CALLINT_ALL_INDIRS;
4678 // other helpers kill nothing
4679 return CALLINT_NONE;
4682 /*****************************************************************************
4684 * See if the given tree can be computed in the given precision (which must
4685 * be smaller than the type of the tree for this to make sense). If 'doit'
4686 * is false, we merely check to see whether narrowing is possible; if we
4687 * get called with 'doit' being true, we actually perform the narrowing.
4690 bool Compiler::optNarrowTree(GenTreePtr tree,
4693 ValueNumPair vnpNarrow,
4700 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4702 /* Assume we're only handling integer types */
4703 noway_assert(varTypeIsIntegral(srct));
4704 noway_assert(varTypeIsIntegral(dstt));
4706 unsigned srcSize = genTypeSize(srct);
4707 unsigned dstSize = genTypeSize(dstt);
4709 /* dstt must be smaller than srct to narrow */
4710 if (dstSize >= srcSize)
4713 /* Figure out what kind of a node we have */
4714 oper = tree->OperGet();
4715 kind = tree->OperKind();
4717 if (kind & GTK_ASGOP)
4719 noway_assert(doit == false);
4723 ValueNumPair NoVNPair = ValueNumPair();
4725 if (kind & GTK_LEAF)
4729 /* Constants can usually be narrowed by changing their value */
4730 CLANG_FORMAT_COMMENT_ANCHOR;
4732 #ifndef _TARGET_64BIT_
4737 lval = tree->gtIntConCommon.LngValue();
4742 case TYP_BYTE : lmask = 0x0000007F; break;
4744 case TYP_UBYTE: lmask = 0x000000FF; break;
4745 case TYP_SHORT: lmask = 0x00007FFF; break;
4746 case TYP_CHAR : lmask = 0x0000FFFF; break;
4747 case TYP_INT : lmask = 0x7FFFFFFF; break;
4748 case TYP_UINT : lmask = 0xFFFFFFFF; break;
4750 default: return false;
4753 if ((lval & lmask) != lval)
4758 tree->ChangeOperConst (GT_CNS_INT);
4759 tree->gtType = TYP_INT;
4760 tree->gtIntCon.gtIconVal = (int) lval;
4761 if (vnStore != nullptr)
4763 fgValueNumberTreeConst(tree);
4772 ssize_t ival; ival = tree->gtIntCon.gtIconVal;
4773 ssize_t imask; imask = 0;
4777 case TYP_BYTE : imask = 0x0000007F; break;
4779 case TYP_UBYTE: imask = 0x000000FF; break;
4780 case TYP_SHORT: imask = 0x00007FFF; break;
4781 case TYP_CHAR : imask = 0x0000FFFF; break;
4782 #ifdef _TARGET_64BIT_
4783 case TYP_INT : imask = 0x7FFFFFFF; break;
4784 case TYP_UINT : imask = 0xFFFFFFFF; break;
4785 #endif // _TARGET_64BIT_
4786 default: return false;
4789 if ((ival & imask) != ival)
4792 #ifdef _TARGET_64BIT_
4795 tree->gtType = TYP_INT;
4796 tree->gtIntCon.gtIconVal = (int) ival;
4797 if (vnStore != nullptr)
4799 fgValueNumberTreeConst(tree);
4802 #endif // _TARGET_64BIT_
4806 /* Operands that are in memory can usually be narrowed
4807 simply by changing their gtType */
4810 /* We only allow narrowing long -> int for a GT_LCL_VAR */
4811 if (dstSize == sizeof(int))
4822 noway_assert(doit == false);
4827 if (kind & (GTK_BINOP|GTK_UNOP))
4829 GenTreePtr op1; op1 = tree->gtOp.gtOp1;
4830 GenTreePtr op2; op2 = tree->gtOp.gtOp2;
4832 switch (tree->gtOper)
4835 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
4837 // Is op2 a small constant than can be narrowed into dstt?
4838 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
4839 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
4841 // We will change the type of the tree and narrow op2
4845 tree->gtType = genActualType(dstt);
4846 tree->SetVNs(vnpNarrow);
4848 optNarrowTree(op2, srct, dstt, NoVNPair, true);
4849 // We may also need to cast away the upper bits of op1
4852 assert(tree->gtType == TYP_INT);
4853 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
4855 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
4857 tree->gtOp.gtOp1 = op1;
4868 if (tree->gtOverflow() || varTypeIsSmall(dstt))
4870 noway_assert(doit == false);
4878 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
4879 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
4881 if (gtIsActiveCSE_Candidate(op1) ||
4882 gtIsActiveCSE_Candidate(op2) ||
4883 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) ||
4884 !optNarrowTree(op2, srct, dstt, NoVNPair, doit) )
4886 noway_assert(doit == false);
4890 /* Simply change the type of the tree */
4894 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
4895 tree->gtFlags &= ~GTF_MUL_64RSLT;
4897 tree->gtType = genActualType(dstt);
4898 tree->SetVNs(vnpNarrow);
4906 /* Simply change the type of the tree */
4908 if (doit && (dstSize <= genTypeSize(tree->gtType)))
4910 tree->gtType = genSignedType(dstt);
4911 tree->SetVNs(vnpNarrow);
4913 /* Make sure we don't mess up the variable type */
4914 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
4915 tree->gtFlags |= GTF_VAR_CAST;
4927 /* These can always be narrowed since they only represent 0 or 1 */
4932 var_types cast = tree->CastToType();
4933 var_types oprt = op1->TypeGet();
4934 unsigned oprSize = genTypeSize(oprt);
4939 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
4942 if (tree->gtOverflow())
4945 /* Is this a cast from the type we're narrowing to or a smaller one? */
4947 if (oprSize <= dstSize)
4949 /* Bash the target type of the cast */
4953 dstt = genSignedType(dstt);
4955 if (oprSize == dstSize)
4957 // Same size: change the CAST into a NOP
4958 tree->ChangeOper (GT_NOP);
4959 tree->gtType = dstt;
4960 tree->gtOp.gtOp2 = nullptr;
4961 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
4965 // oprSize is smaller
4966 assert(oprSize < dstSize);
4968 // Change the CastToType in the GT_CAST node
4969 tree->CastToType() = dstt;
4971 // The result type of a GT_CAST is never a small type.
4972 // Use genActualType to widen dstt when it is a small types.
4973 tree->gtType = genActualType(dstt);
4974 tree->SetVNs(vnpNarrow);
4984 if (!gtIsActiveCSE_Candidate(op2) &&
4985 optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
4987 /* Simply change the type of the tree */
4991 tree->gtType = genActualType(dstt);
4992 tree->SetVNs(vnpNarrow);
4999 noway_assert(doit == false);
5009 /*****************************************************************************
5011 * The following logic figures out whether the given variable is assigned
5012 * somewhere in a list of basic blocks (or in an entire loop).
5016 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr *pTree, fgWalkData *data)
5018 GenTreePtr tree = *pTree;
5020 if (tree->OperKind() & GTK_ASGOP)
5022 GenTreePtr dest = tree->gtOp.gtOp1;
5023 genTreeOps destOper = dest->OperGet();
5025 isVarAssgDsc * desc = (isVarAssgDsc*)data->pCallbackData;
5026 assert(desc && desc->ivaSelf == desc);
5028 if (destOper == GT_LCL_VAR)
5030 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5031 if (tvar < lclMAX_ALLSET_TRACKED)
5032 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5034 desc->ivaMaskIncomplete = true;
5036 if (tvar == desc->ivaVar)
5038 if (tree != desc->ivaSkip)
5042 else if (destOper == GT_LCL_FLD)
5044 /* We can't track every field of every var. Moreover, indirections
5045 may access different parts of the var as different (but
5046 overlapping) fields. So just treat them as indirect accesses */
5048 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5049 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5051 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF
5053 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5055 else if (destOper == GT_CLS_VAR)
5057 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5059 else if (destOper == GT_IND)
5061 /* Set the proper indirection bits */
5063 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF
5065 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5068 else if (tree->gtOper == GT_CALL)
5070 isVarAssgDsc * desc = (isVarAssgDsc*)data->pCallbackData;
5071 assert(desc && desc->ivaSelf == desc);
5073 desc->ivaMaskCall = optCallInterf(tree);
5076 return WALK_CONTINUE;
5079 /*****************************************************************************/
5081 bool Compiler::optIsVarAssigned(BasicBlock * beg,
5089 desc.ivaSkip = skip;
5091 desc.ivaSelf = &desc;
5094 desc.ivaMaskCall = CALLINT_NONE;
5095 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5101 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5103 noway_assert(stmt->gtOper == GT_STMT);
5104 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5124 /*****************************************************************************/
5125 int Compiler::optIsSetAssgLoop(unsigned lnum,
5126 ALLVARSET_VALARG_TP vars,
5131 /* Get hold of the loop descriptor */
5133 noway_assert(lnum < optLoopCount);
5134 loop = optLoopTable + lnum;
5136 /* Do we already know what variables are assigned within this loop? */
5138 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5145 /* Prepare the descriptor used by the tree walker call-back */
5147 desc.ivaVar = (unsigned)-1;
5148 desc.ivaSkip = NULL;
5150 desc.ivaSelf = &desc;
5152 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5153 desc.ivaMaskInd = VR_NONE;
5154 desc.ivaMaskCall = CALLINT_NONE;
5155 desc.ivaMaskIncomplete = false;
5157 /* Now walk all the statements of the loop */
5159 beg = loop->lpHead->bbNext;
5160 end = loop->lpBottom;
5162 for (/**/; /**/; beg = beg->bbNext)
5166 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5168 noway_assert(stmt->gtOper == GT_STMT);
5169 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5171 if (desc.ivaMaskIncomplete)
5173 loop->lpFlags |= LPFLG_ASGVARS_INC;
5181 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5182 loop->lpAsgInds = desc.ivaMaskInd;
5183 loop->lpAsgCall = desc.ivaMaskCall;
5185 /* Now we know what variables are assigned in the loop */
5187 loop->lpFlags |= LPFLG_ASGVARS_YES;
5190 /* Now we can finally test the caller's mask against the loop's */
5191 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) ||
5192 (loop->lpAsgInds & inds))
5197 switch (loop->lpAsgCall)
5201 /* Can't hoist if the call might have side effect on an indirection. */
5203 if (loop->lpAsgInds != VR_NONE)
5208 case CALLINT_REF_INDIRS:
5210 /* Can't hoist if the call might have side effect on an ref indirection. */
5212 if (loop->lpAsgInds & VR_IND_REF)
5217 case CALLINT_SCL_INDIRS:
5219 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5221 if (loop->lpAsgInds & VR_IND_SCL)
5226 case CALLINT_ALL_INDIRS:
5228 /* Can't hoist if the call might have side effect on any indirection. */
5230 if (loop->lpAsgInds & (VR_IND_REF|VR_IND_SCL))
5237 /* Other helpers kill nothing */
5242 noway_assert(!"Unexpected lpAsgCall value");
5248 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5253 printf("\nHoisting a copy of ");
5254 printTreeID(origExpr);
5255 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n",
5256 lnum, optLoopTable[lnum].lpFirst->bbNum, optLoopTable[lnum].lpBottom->bbNum);
5257 gtDispTree(origExpr);
5262 // This loop has to be in a form that is approved for hoisting.
5263 assert (optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5265 // Create a copy of the expression and mark it for CSE's.
5266 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5268 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5269 assert(hoistExpr != origExpr);
5270 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5272 GenTreePtr hoist = hoistExpr;
5273 // The value of the expression isn't used (unless it's an assignment).
5274 if (hoistExpr->OperGet() != GT_ASG)
5276 hoist = gtUnusedValNode(hoistExpr);
5279 /* Put the statement in the preheader */
5281 fgCreateLoopPreHeader(lnum);
5283 BasicBlock * preHead = optLoopTable[lnum].lpHead;
5284 assert (preHead->bbJumpKind == BBJ_NONE);
5286 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5287 // (or in this case, will contain) the expression.
5288 compCurBB = preHead;
5290 // Increment the ref counts of any local vars appearing in "hoist".
5291 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5292 // fold away some of the lcl vars referenced by "hoist".
5293 lvaRecursiveIncRefCounts(hoist);
5295 hoist = fgMorphTree(hoist);
5297 GenTreePtr hoistStmt = gtNewStmt(hoist);
5298 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5300 /* simply append the statement at the end of the preHead's list */
5302 GenTreePtr treeList = preHead->bbTreeList;
5306 /* append after last statement */
5308 GenTreePtr last = treeList->gtPrev;
5309 assert (last->gtNext == 0);
5311 last->gtNext = hoistStmt;
5312 hoistStmt->gtPrev = last;
5313 treeList->gtPrev = hoistStmt;
5317 /* Empty pre-header - store the single statement in the block */
5319 preHead->bbTreeList = hoistStmt;
5320 hoistStmt->gtPrev = hoistStmt;
5323 hoistStmt->gtNext = nullptr;
5328 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5333 if (fgStmtListThreaded)
5335 gtSetStmtInfo(hoistStmt);
5336 fgSetStmtSeq(hoistStmt);
5340 if (m_nodeTestData != NULL)
5343 // What is the depth of the loop "lnum"?
5345 unsigned lnumIter = lnum;
5346 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5349 lnumIter = optLoopTable[lnumIter].lpParent;
5352 NodeToTestDataMap* testData = GetNodeTestData();
5354 TestLabelAndNum tlAndN;
5355 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5357 if (tlAndN.m_num == -1)
5360 printTreeID(origExpr);
5361 printf(" was declared 'do not hoist', but is being hoisted.\n");
5364 else if (tlAndN.m_num != depth)
5367 printTreeID(origExpr);
5368 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth %d.\n",
5369 tlAndN.m_num, depth);
5374 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must hoist" annotations.
5375 testData->Remove(origExpr);
5376 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5377 tlAndN.m_tl = TL_CSE_Def;
5378 tlAndN.m_num = m_loopHoistCSEClass++;
5379 testData->Set(hoistExpr, tlAndN);
5385 #if LOOP_HOIST_STATS
5386 if (!m_curLoopHasHoistedExpression)
5388 m_loopsWithHoistedExpressions++;
5389 m_curLoopHasHoistedExpression = true;
5391 m_totalHoistedExpressions++;
5392 #endif // LOOP_HOIST_STATS
5395 void Compiler::optHoistLoopCode()
5397 // If we don't have any loops in the method then take an early out now.
5398 if (optLoopCount == 0)
5402 unsigned jitNoHoist = JitConfig.JitNoHoist();
5410 // The code in this #if has been useful in debugging loop cloning issues, by
5411 // enabling selective enablement of the loop cloning optimization according to
5414 unsigned methHash = info.compMethodHash();
5415 char* lostr = getenv("loophoisthashlo");
5416 unsigned methHashLo = 0;
5419 sscanf_s(lostr, "%x", &methHashLo);
5420 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5422 char* histr = getenv("loophoisthashhi");
5423 unsigned methHashHi = UINT32_MAX;
5426 sscanf_s(histr, "%x", &methHashHi);
5427 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5429 if (methHash < methHashLo || methHash > methHashHi)
5431 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5433 #endif // 0 -- debugging loop cloning issues
5438 printf("\n*************** In optHoistLoopCode()\n");
5439 printf("Blocks/Trees before phase\n");
5440 fgDispBasicBlocks(true);
5445 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5446 // they are invariant.)
5447 LoopHoistContext hoistCtxt(this);
5448 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5450 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5453 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5455 optHoistLoopNest(lnum, &hoistCtxt);
5464 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5465 fgDispBasicBlocks(true);
5469 // Make sure that the predecessor lists are accurate
5470 fgDebugCheckBBlist();
5475 // Test Data stuff..
5476 // If we have no test data, early out.
5477 if (m_nodeTestData == NULL) return;
5478 NodeToTestDataMap* testData = GetNodeTestData();
5479 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5481 TestLabelAndNum tlAndN;
5482 GenTreePtr node = ki.Get();
5483 bool b = testData->Lookup(node, &tlAndN);
5485 if (tlAndN.m_tl != TL_LoopHoist) continue;
5486 // Otherwise, it is a loop hoist annotation.
5487 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5488 if (tlAndN.m_num >= 0)
5492 printf(" was declared 'must hoist', but has not been hoisted.\n");
5499 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5501 // Do this loop, then recursively do all nested loops.
5502 CLANG_FORMAT_COMMENT_ANCHOR;
5504 #if LOOP_HOIST_STATS
5506 m_curLoopHasHoistedExpression = false;
5507 m_loopsConsidered++;
5508 #endif // LOOP_HOIST_STATS
5510 optHoistThisLoop(lnum, hoistCtxt);
5512 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5514 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5516 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5517 // TODO-Cleanup: we should have a set abstraction for loops.
5518 if (hoistedInCurLoop != nullptr)
5520 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5524 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5526 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5530 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling)
5532 optHoistLoopNest(child, hoistCtxt);
5536 // TODO-Cleanup: we should have a set abstraction for loops.
5537 if (hoistedInCurLoop != nullptr)
5539 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5541 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5542 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5548 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5550 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5552 /* If loop was removed continue */
5554 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5557 /* Get the head and tail of the loop */
5559 BasicBlock* head = pLoopDsc->lpHead;
5560 BasicBlock* tail = pLoopDsc->lpBottom;
5561 BasicBlock* lbeg = pLoopDsc->lpEntry;
5564 // We must have a do-while loop
5565 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5568 // The loop-head must dominate the loop-entry.
5569 // TODO-CQ: Couldn't we make this true if it's not?
5570 if (!fgDominate(head, lbeg))
5573 // if lbeg is the start of a new try block then we won't be able to hoist
5574 if (!BasicBlock::sameTryRegion(head, lbeg))
5577 // We don't bother hoisting when inside of a catch block
5578 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5581 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5583 unsigned begn = lbeg->bbNum;
5584 unsigned endn = tail->bbNum;
5586 // Ensure the per-loop sets/tables are empty.
5587 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5592 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5593 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5597 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut,
5598 pLoopDsc->lpVarUseDef));
5600 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5601 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5602 pLoopDsc->lpHoistedExprCount = 0;
5604 #ifndef _TARGET_64BIT_
5605 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5607 if (longVarsCount > 0)
5609 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5610 // the Counts such that each TYP_LONG variable counts twice.
5612 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5613 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5618 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5619 lvaDispVarSet(lvaLongVars);
5622 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5623 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5625 #endif // !_TARGET_64BIT_
5630 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5631 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5633 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5634 lvaDispVarSet(pLoopDsc->lpVarInOut);
5636 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5637 lvaDispVarSet(loopVars);
5642 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5644 if (floatVarsCount > 0)
5646 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5647 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5649 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5650 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5651 pLoopDsc->lpHoistedFPExprCount = 0;
5653 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5654 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5659 printf( " INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5660 lvaDispVarSet(inOutFPVars);
5662 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5663 lvaDispVarSet(loopFPVars);
5667 else // (floatVarsCount == 0)
5669 pLoopDsc->lpLoopVarFPCount = 0;
5670 pLoopDsc->lpVarInOutFPCount = 0;
5671 pLoopDsc->lpHoistedFPExprCount = 0;
5674 // Find the set of definitely-executed blocks.
5675 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5676 // Until we have post-dominators, we'll special-case for single-exit blocks.
5677 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5678 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5680 assert(pLoopDsc->lpExit != nullptr);
5681 BasicBlock* cur = pLoopDsc->lpExit;
5682 // Push dominators, until we reach "entry" or exit the loop.
5683 while ( cur != nullptr
5684 && pLoopDsc->lpContains(cur)
5685 && cur != pLoopDsc->lpEntry)
5690 // If we didn't reach the entry block, give up and *just* push the entry block.
5691 if (cur != pLoopDsc->lpEntry)
5695 defExec.Push(pLoopDsc->lpEntry);
5697 else // More than one exit
5699 // We'll assume that only the entry block is definitely executed.
5700 // We could in the future do better.
5701 defExec.Push(pLoopDsc->lpEntry);
5704 while (defExec.Size() > 0)
5706 // Consider in reverse order: dominator before dominatee.
5707 BasicBlock* blk = defExec.Pop();
5708 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5712 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5713 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk,
5715 LoopHoistContext* hoistCtxt)
5717 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5718 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5719 unsigned blkWeight = blk->getBBWeight(this);
5724 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
5726 refCntWtd2str(blkWeight),
5728 pLoopDsc->lpFirst->bbNum,
5729 pLoopDsc->lpBottom->bbNum,
5730 firstBlockAndBeforeSideEffect ? "true" : "false");
5731 if (blkWeight < (BB_UNITY_WEIGHT / 10))
5733 printf(" block weight is too small to perform hoisting.\n");
5738 if (blkWeight < (BB_UNITY_WEIGHT / 10))
5740 // Block weight is too small to perform hoisting.
5744 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5746 GenTreePtr stmtTree = stmt->gtStmtExpr;
5748 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
5751 // we will try to hoist the top-level stmtTree
5752 optHoistCandidate(stmtTree, lnum, hoistCtxt);
5757 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
5759 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5761 bool loopContainsCall = pLoopDsc->lpContainsCall;
5764 int hoistedExprCount;
5768 if (varTypeIsFloating(tree->TypeGet()))
5770 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
5771 loopVarCount = pLoopDsc->lpLoopVarFPCount;
5772 varInOutCount = pLoopDsc->lpVarInOutFPCount;
5774 availRegCount = CNT_CALLEE_SAVED_FLOAT;
5775 if (!loopContainsCall)
5777 availRegCount += CNT_CALLEE_TRASH_FLOAT-1;
5780 // For ARM each double takes two FP registers
5781 // For now on ARM we won't track singles/doubles
5782 // and instead just assume that we always have doubles.
5789 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
5790 loopVarCount = pLoopDsc->lpLoopVarCount;
5791 varInOutCount = pLoopDsc->lpVarInOutCount;
5793 availRegCount = CNT_CALLEE_SAVED-1;
5794 if (!loopContainsCall)
5796 availRegCount += CNT_CALLEE_TRASH-1;
5798 #ifndef _TARGET_64BIT_
5799 // For our 32-bit targets Long types take two registers.
5800 if (varTypeIsLong(tree->TypeGet()))
5802 availRegCount = (availRegCount+1) / 2;
5807 // decrement the availRegCount by the count of expression that we have already hoisted.
5808 availRegCount -= hoistedExprCount;
5810 // the variables that are read/written inside the loop should
5811 // always be a subset of the InOut variables for the loop
5812 assert(loopVarCount <= varInOutCount);
5814 // When loopVarCount >= availRegCount we believe that all of the
5815 // available registers will get used to hold LclVars inside the loop.
5816 // This pessimistically assumes that each loopVar has a conflicting
5817 // lifetime with every other loopVar.
5818 // For this case we will hoist the expression only if is profitable
5819 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
5820 // as we believe it will be placed in the stack or one of the other
5821 // loopVars will be spilled into the stack
5823 if (loopVarCount >= availRegCount)
5825 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
5826 if (tree->gtCostEx < (2*IND_COST_EX))
5830 // When varInOutCount < availRegCount we are know that there are
5831 // some available register(s) when we enter the loop body.
5832 // When varInOutCount == availRegCount there often will be a register
5833 // available when we enter the loop body, since a loop often defines a
5834 // LclVar on exit or there is often at least one LclVar that is worth
5835 // spilling to the stack to make way for this hoisted expression.
5836 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
5838 if (varInOutCount > availRegCount)
5840 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
5841 if (tree->gtCostEx <= MIN_CSE_COST+1)
5849 // This function returns true if 'tree' is a loop invariant expression.
5850 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
5852 bool Compiler::optHoistLoopExprsForTree(GenTreePtr tree,
5854 LoopHoistContext* hoistCtxt,
5855 bool* pFirstBlockAndBeforeSideEffect,
5858 // First do the children.
5859 // We must keep track of whether each child node was hoistable or not
5861 unsigned nChildren = tree->NumChildren();
5862 bool childrenHoistable[GenTree::MAX_CHILDREN];
5864 // Initialize the array elements for childrenHoistable[] to false
5865 for (unsigned i = 0; i < nChildren; i++)
5867 childrenHoistable[i] = false;
5870 bool treeIsInvariant = true;
5871 for (unsigned childNum = 0; childNum < nChildren; childNum++)
5873 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect, &childrenHoistable[childNum]))
5875 treeIsInvariant = false;
5879 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
5881 bool treeIsHoistable = treeIsInvariant;
5883 // But we must see if anything else prevents "tree" from being hoisted.
5885 if (treeIsInvariant)
5887 // Tree must be a suitable CSE candidate for us to be able to hoist it.
5888 treeIsHoistable = optIsCSEcandidate(tree);
5890 // If it's a call, it must be a helper call, and be pure.
5891 // Further, if it may run a cctor, it must be labeled as "Hoistable"
5892 // (meaning it won't run a cctor because the class is not precise-init).
5893 if (treeIsHoistable && tree->OperGet() == GT_CALL)
5895 GenTreeCall* call = tree->AsCall();
5896 if (call->gtCallType != CT_HELPER)
5898 treeIsHoistable = false;
5902 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
5903 if (!s_helperCallProperties.IsPure(helpFunc))
5905 treeIsHoistable = false;
5907 else if ( s_helperCallProperties.MayRunCctor(helpFunc)
5908 && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
5910 treeIsHoistable = false;
5915 if (treeIsHoistable)
5917 if (!(*pFirstBlockAndBeforeSideEffect))
5919 // For now, we give up on an expression that might raise an exception if it is after the
5920 // first possible global side effect (and we assume we're after that if we're not in the first block).
5921 //TODO-CQ: this is when we might do loop cloning.
5923 if ((tree->gtFlags & GTF_EXCEPT) != 0)
5925 treeIsHoistable = false;
5928 // Currently we must give up on reads from static variables (even if we are in the first block).
5930 if (tree->OperGet() == GT_CLS_VAR)
5932 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe method Main
5933 treeIsHoistable = false;
5937 // Is the value of the whole tree loop invariant?
5938 treeIsInvariant = optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
5940 // Is the value of the whole tree loop invariant?
5941 if (!treeIsInvariant)
5943 treeIsHoistable = false;
5947 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
5948 // If we encounter a tree with a call in it
5949 // or if we see an assignment to global we set it to false.
5951 // If we are already set to false then we can skip these checks
5953 if (*pFirstBlockAndBeforeSideEffect)
5955 // For this purpose, we only care about memory side effects. We assume that expressions will
5956 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
5957 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
5958 // here, since that includes exceptions.)
5959 if (tree->gtFlags & GTF_CALL)
5961 *pFirstBlockAndBeforeSideEffect = false;
5963 else if (tree->OperIsAssignment())
5965 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
5966 GenTreePtr lhs = tree->gtOp.gtOp1;
5967 if (lhs->gtFlags & GTF_GLOB_REF)
5969 *pFirstBlockAndBeforeSideEffect = false;
5971 } else if (tree->OperIsCopyBlkOp())
5973 GenTreePtr args = tree->gtOp.gtOp1;
5974 assert(args->OperGet() == GT_LIST);
5975 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
5977 *pFirstBlockAndBeforeSideEffect = false;
5982 // If this 'tree' is hoistable then we return and the caller will
5983 // decide to hoist it as part of larger hoistable expression.
5985 if (!treeIsHoistable)
5987 // We are not hoistable so we will now hoist any hoistable children.
5989 for (unsigned childNum = 0; childNum < nChildren; childNum++)
5991 if (childrenHoistable[childNum])
5993 // We can't hoist the LHS of an assignment, isn't a real use.
5994 if (childNum == 0 && (tree->OperIsAssignment()))
5997 GenTreePtr child = tree->GetChild(childNum);
5999 // We try to hoist this 'child' tree
6000 optHoistCandidate(child, lnum, hoistCtxt);
6005 *pHoistable = treeIsHoistable;
6006 return treeIsInvariant;
6009 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6011 if (lnum == BasicBlock::NOT_IN_LOOP)
6013 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6017 // The outer loop also must be suitable for hoisting...
6018 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6021 // If the hoisted expression isn't valid at this loop head then break
6022 if (!optTreeIsValidAtLoopHead(tree, lnum))
6025 // It must pass the hoistable profitablity tests for this loop level
6026 if (!optIsProfitableToHoistableTree(tree, lnum))
6031 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6033 // already hoisted in a parent loop, so don't hoist this expression.
6037 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6039 // already hoisted this expression in the current loop, so don't hoist this expression.
6043 // Expression can be hoisted
6044 optPerformHoistExpr(tree, lnum);
6046 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6047 if (!varTypeIsFloating(tree->TypeGet()))
6049 optLoopTable[lnum].lpHoistedExprCount++;
6050 #ifndef _TARGET_64BIT_
6051 // For our 32-bit targets Long types take two registers.
6052 if (varTypeIsLong(tree->TypeGet()))
6054 optLoopTable[lnum].lpHoistedExprCount++;
6058 else // Floating point expr hoisted
6060 optLoopTable[lnum].lpHoistedFPExprCount++;
6063 // Record the hoisted expression in hoistCtxt
6064 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6068 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6070 // If it is not a VN, is not loop-invariant.
6071 if (vn == ValueNumStore::NoVN) return false;
6073 // We'll always short-circuit constants.
6074 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6077 // If we've done this query previously, don't repeat.
6078 bool previousRes = false;
6079 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6084 if (vnStore->GetVNFunc(vn, &funcApp))
6086 if (funcApp.m_func == VNF_PhiDef)
6088 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6089 VNFuncApp phiDefValFuncApp;
6090 if ( !vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp)
6091 || phiDefValFuncApp.m_func != VNF_Phi)
6093 // It's not *really* a definition, rather a pass-through of some other VN.
6094 // (This could occur, say if both sides of an if-then-else diamond made the
6095 // same assignment to a variable.)
6096 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6100 // Is the definition within the loop? If so, is not loop-invariant.
6101 unsigned lclNum = funcApp.m_args[0];
6102 unsigned ssaNum = funcApp.m_args[1];
6103 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6104 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6107 else if (funcApp.m_func == VNF_PhiHeapDef)
6109 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6110 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6114 for (unsigned i = 0; i < funcApp.m_arity; i++)
6116 // TODO-CQ: We need to either make sure that *all* VN functions
6117 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6119 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6129 // Otherwise, assume non-function "new, unique" VN's are not loop invariant.
6133 loopVnInvariantCache->Set(vn, res);
6137 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6139 if (tree->OperIsLocal())
6141 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6142 unsigned lclNum = lclVar->gtLclNum;
6144 // The lvlVar must be have an Ssa tracked lifetime
6145 if (fgExcludeFromSsa(lclNum))
6148 // If the loop does not contains the SSA def we can hoist it.
6149 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6152 else if (tree->OperIsConst())
6156 else // If every one of the children nodes are valid at this Loop's Head.
6158 unsigned nChildren = tree->NumChildren();
6159 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6161 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6170 /*****************************************************************************
6172 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6173 * header. The pre-header will replace the current lpHead in the loop table.
6174 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6175 * will also be dominated by the loop-top, lpHead->bbNext.
6179 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6181 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6183 /* This loop has to be a "do-while" loop */
6185 assert (pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6187 /* Have we already created a loop-preheader block? */
6189 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6192 BasicBlock* head = pLoopDsc->lpHead;
6193 BasicBlock* top = pLoopDsc->lpTop;
6194 BasicBlock* entry = pLoopDsc->lpEntry;
6196 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6197 if (!BasicBlock::sameTryRegion(head, entry))
6200 // Ensure that lpHead always dominates lpEntry
6202 noway_assert(fgDominate(head, entry));
6204 /* Get hold of the first block of the loop body */
6206 assert (top == entry);
6208 /* Allocate a new basic block */
6210 BasicBlock * preHead = bbNewBasicBlock(BBJ_NONE);
6211 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6213 // Must set IL code offset
6214 preHead->bbCodeOffs = top->bbCodeOffs;
6216 // Set the default value of the preHead weight in case we don't have
6217 // valid profile data and since this blocks weight is just an estimate
6218 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6220 preHead->inheritWeight(head);
6221 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6225 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n",
6226 preHead->bbNum, lnum, top->bbNum, pLoopDsc->lpBottom->bbNum,
6227 refCntWtd2str(preHead->getBBWeight(this)));
6230 // The preheader block is part of the containing loop (if any).
6231 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6233 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6235 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6237 preHead->bbWeight = 0;
6238 preHead->bbFlags |= BBF_RUN_RARELY;
6242 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0)
6243 && ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0)
6244 && ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6246 if (allValidProfileWeights)
6248 double loopEnteredCount;
6249 double loopSkippedCount;
6251 if (fgHaveValidEdgeWeights)
6253 flowList * edgeToNext = fgGetPredForBlock(head->bbNext, head);
6254 flowList * edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6255 noway_assert(edgeToNext != NULL);
6256 noway_assert(edgeToJump != NULL);
6258 loopEnteredCount = ((double) edgeToNext->flEdgeWeightMin + (double) edgeToNext->flEdgeWeightMax) / 2.0;
6259 loopSkippedCount = ((double) edgeToJump->flEdgeWeightMin + (double) edgeToJump->flEdgeWeightMax) / 2.0;
6263 loopEnteredCount = (double) head->bbNext->bbWeight;
6264 loopSkippedCount = (double) head->bbJumpDest->bbWeight;
6267 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6269 // Calculate a good approximation of the preHead's block weight
6270 unsigned preHeadWeight = (unsigned) (((double) head->bbWeight * loopTakenRatio) + 0.5);
6271 preHead->setBBWeight(max(preHeadWeight, 1));
6272 noway_assert(!preHead->isRunRarely());
6277 // Link in the preHead block.
6278 fgInsertBBbefore(top, preHead);
6280 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6281 // However, that is too expensive at this point. Instead, we update the phi
6282 // node block references, if we created pre-header block due to hoisting.
6283 // This is sufficient because any definition participating in SSA that flowed
6284 // into the phi via the loop header block will now flow through the preheader
6285 // block from the header block.
6287 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6289 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6290 if (tree->OperGet() != GT_ASG)
6294 GenTreePtr op2 = tree->gtGetOp2();
6295 if (op2->OperGet() != GT_PHI)
6299 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6300 while (args != nullptr)
6302 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6303 if (phiArg->gtPredBB == head)
6305 phiArg->gtPredBB = preHead;
6307 args = args->Rest();
6311 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6312 // to set the handler index on the pre header without updating the exception table.
6313 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6315 // Update the EH table to make the hoisted block part of the loop's EH block.
6316 fgExtendEHRegionBefore(top);
6318 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6319 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6321 /* Update the loop entry */
6323 pLoopDsc->lpHead = preHead;
6324 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6326 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6327 All predecessors of 'beg', (which is the entry in the loop)
6328 now have to jump to 'preHead', unless they are dominated by 'head' */
6330 preHead->bbRefs = 0;
6331 fgAddRefPred(preHead, head);
6332 bool checkNestedLoops = false;
6334 for (flowList * pred = top->bbPreds; pred; pred = pred->flNext)
6336 BasicBlock * predBlock = pred->flBlock;
6338 if (fgDominate(top, predBlock))
6340 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6341 // (we know that 'head' dominates 'top'), but using 'top' instead of
6342 // 'head' in the test allows us to not enter here if 'predBlock == head'
6344 if (predBlock != pLoopDsc->lpBottom)
6346 noway_assert(predBlock != head);
6347 checkNestedLoops = true;
6352 switch (predBlock->bbJumpKind)
6355 noway_assert(predBlock == head);
6359 if (predBlock == head)
6361 noway_assert(predBlock->bbJumpDest != top);
6367 case BBJ_EHCATCHRET:
6368 noway_assert(predBlock->bbJumpDest == top);
6369 predBlock->bbJumpDest = preHead;
6370 preHead->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
6372 if (predBlock == head)
6374 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6375 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6376 // Just break, pred will be removed after switch.
6380 fgRemoveRefPred(top, predBlock);
6381 fgAddRefPred(preHead, predBlock);
6386 unsigned jumpCnt; jumpCnt = predBlock->bbJumpSwt->bbsCount;
6387 BasicBlock * * jumpTab; jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6392 if ((*jumpTab) == top)
6394 (*jumpTab) = preHead;
6396 fgRemoveRefPred(top, predBlock);
6397 fgAddRefPred(preHead, predBlock);
6398 preHead->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
6401 while (++jumpTab, --jumpCnt);
6404 noway_assert(!"Unexpected bbJumpKind");
6409 noway_assert(!fgGetPredForBlock(top, preHead));
6410 fgRemoveRefPred(top, head);
6411 fgAddRefPred(top, preHead);
6414 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6415 (other than the back-edge of the loop we are considering) then we likely have nested
6416 do-while loops with the same entry block and inserting the preheader block changes the head
6417 of all the nested loops. Now we will update this piece of information in the loop table, and
6418 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6419 do-while loops with the same entry block).
6421 if (checkNestedLoops)
6423 for (unsigned l = 0; l < optLoopCount; l++)
6425 if (optLoopTable[l].lpHead == head)
6427 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6428 noway_assert(optLoopTable[l].lpEntry == top);
6429 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6430 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6433 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n",
6434 preHead->bbNum, l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6441 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6443 unsigned lnum = blk->bbNatLoopNum;
6444 while (lnum != BasicBlock::NOT_IN_LOOP)
6446 if (optLoopTable[lnum].lpEntry == blk)
6451 lnum = optLoopTable[lnum].lpParent;
6456 void Compiler::optComputeLoopSideEffects()
6459 for (lnum = 0; lnum < optLoopCount; lnum++)
6461 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6462 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6463 optLoopTable[lnum].lpContainsCall = false;
6466 for (lnum = 0; lnum < optLoopCount; lnum++)
6468 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6471 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) // Is outermost...
6472 optComputeLoopNestSideEffects(lnum);
6475 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6476 #ifndef _TARGET_64BIT_
6477 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6480 for (unsigned i = 0; i < lvaCount; i++)
6482 LclVarDsc* varDsc = &lvaTable[i];
6483 if (varDsc->lvTracked)
6485 if (varTypeIsFloating(varDsc->lvType))
6487 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6489 #ifndef _TARGET_64BIT_
6490 else if (varTypeIsLong(varDsc->lvType))
6492 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6499 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6501 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6502 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6503 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6505 optComputeLoopSideEffectsOfBlock(bbInLoop);
6509 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6511 unsigned mostNestedLoop = blk->bbNatLoopNum;
6512 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6514 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6516 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6518 // Now iterate over the remaining statements, and their trees.
6519 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6521 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6523 genTreeOps oper = tree->OperGet();
6525 // Even after we set heapHavoc we still may want to know if a loop contains calls
6528 if (oper == GT_CALL)
6530 // Record that this loop contains a call
6531 AddContainsCallAllContainingLoops(mostNestedLoop);
6534 // If we just set lpContainsCall or it was previously set
6535 if (optLoopTable[mostNestedLoop].lpContainsCall)
6537 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6541 // We are just looking for GT_CALL nodes after heapHavoc was set.
6545 // otherwise heapHavoc is not set
6548 // This body is a distillation of the heap-side effect code of value numbering.
6549 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6550 // that the compiler creates.
6552 if (GenTree::OperIsAssignment(oper))
6554 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
6556 if (lhs->OperGet() == GT_IND)
6558 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
6559 FieldSeqNode* fldSeqArrElem = nullptr;
6561 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6569 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6571 // If it's a local byref for which we recorded a value number, use that...
6572 GenTreeLclVar* argLcl = arg->AsLclVar();
6573 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6575 ValueNum argVN = lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6577 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) && funcApp.m_func == VNF_PtrToArrElem)
6579 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6580 CORINFO_CLASS_HANDLE elemType = CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6581 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6582 // Don't set heapHavoc below.
6589 // Is the LHS an array index expression?
6590 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6592 // We actually ignore "fldSeq" -- any modification to an S[], at any
6593 // field of "S", will lose all information about the array type.
6594 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6595 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6599 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6601 GenTreePtr obj = nullptr; // unused
6602 GenTreePtr staticOffset = nullptr; // unused
6603 FieldSeqNode* fldSeq = nullptr;
6605 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6606 (fldSeq != FieldSeqStore::NotAField()))
6608 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6609 assert(fldSeq != nullptr);
6610 if (fldSeq->IsFirstElemFieldSeq())
6612 fldSeq = fldSeq->m_next;
6613 assert(fldSeq != nullptr);
6616 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6624 else if (lhs->OperGet() == GT_CLS_VAR)
6626 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6628 // Otherwise, must be local lhs form. I should assert that.
6629 else if (lhs->OperGet() == GT_LCL_VAR)
6631 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6632 GenTreePtr rhs = tree->gtOp.gtOp2;
6633 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6634 // If we gave the RHS a value number, propagate it.
6635 if (rhsVN != ValueNumStore::NoVN)
6637 rhsVN = vnStore->VNNormVal(rhsVN);
6638 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6640 lvaTable[lhsLcl->GetLclNum()].GetPerSsaData(lhsLcl->GetSsaNum())->m_vnPair.SetLiberal(rhsVN);
6645 else // not GenTree::OperIsAssignment(oper)
6650 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
6654 // Is it an addr of a array index expression?
6656 GenTreePtr addrArg = tree->gtOp.gtOp1;
6657 if (addrArg->OperGet() == GT_IND)
6659 // Is the LHS an array index expression?
6660 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
6663 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
6665 CORINFO_CLASS_HANDLE elemType = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6666 tree->gtVNPair.SetBoth(vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
6667 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
6668 // The rest are dummy arguments.
6669 vnStore->VNForNull(),
6670 vnStore->VNForNull(),
6671 vnStore->VNForNull()));
6681 GenTreeLclVarCommon* lclVarTree;
6683 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6685 // For now, assume arbitrary side effects on the heap...
6686 // TODO-CQ: Why not be complete, and get this case right?
6692 case GT_LOCKADD: // Binop
6693 case GT_XADD: // Binop
6694 case GT_XCHG: // Binop
6695 case GT_CMPXCHG: // Specialop
6703 GenTreeCall* call = tree->AsCall();
6705 // Record that this loop contains a call
6706 AddContainsCallAllContainingLoops(mostNestedLoop);
6708 if (call->gtCallType == CT_HELPER)
6710 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6711 if (s_helperCallProperties.MutatesHeap(helpFunc))
6715 else if (s_helperCallProperties.MayRunCctor(helpFunc))
6717 // If the call is labeled as "Hoistable", then we've checked the
6718 // class that would be constructed, and it is not precise-init, so
6719 // the cctor will not be run by this call. Otherwise, it might be,
6720 // and might have arbitrary side effects.
6721 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
6735 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
6744 // Record that all loops containing this block have heap havoc effects.
6745 unsigned lnum = mostNestedLoop;
6746 while (lnum != BasicBlock::NOT_IN_LOOP)
6748 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
6749 lnum = optLoopTable[lnum].lpParent;
6754 // Marks the containsCall information to "lnum" and any parent loops.
6755 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
6757 assert(0 <= lnum && lnum < optLoopCount);
6758 while (lnum != BasicBlock::NOT_IN_LOOP)
6760 optLoopTable[lnum].lpContainsCall = true;
6761 lnum = optLoopTable[lnum].lpParent;
6765 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
6766 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock * blk)
6768 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
6769 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
6771 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
6772 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
6775 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
6776 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock * blk)
6778 assert(0 <= lnum && lnum < optLoopCount);
6779 while (lnum != BasicBlock::NOT_IN_LOOP)
6781 optLoopTable[lnum].AddVariableLiveness(this, blk);
6782 lnum = optLoopTable[lnum].lpParent;
6786 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
6787 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
6789 assert(0 <= lnum && lnum < optLoopCount);
6790 while (lnum != BasicBlock::NOT_IN_LOOP)
6792 optLoopTable[lnum].AddModifiedField(this, fldHnd);
6793 lnum = optLoopTable[lnum].lpParent;
6797 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
6798 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
6800 assert(0 <= lnum && lnum < optLoopCount);
6801 while (lnum != BasicBlock::NOT_IN_LOOP)
6803 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
6804 lnum = optLoopTable[lnum].lpParent;
6808 /*****************************************************************************
6810 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
6811 * The 'keepList'is either a single tree or a list of trees that are formed by
6812 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
6813 * gtExtractSideEffList method.
6817 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr *pTree, fgWalkData *data)
6819 GenTreePtr tree = *pTree;
6820 Compiler * comp = data->compiler;
6821 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
6823 // We may have a non-NULL side effect list that is being kept
6827 GenTreePtr keptTree = keepList;
6828 while (keptTree->OperGet() == GT_COMMA)
6830 assert(keptTree->OperKind() & GTK_SMPOP);
6831 GenTreePtr op1 = keptTree->gtOp.gtOp1;
6832 GenTreePtr op2 = keptTree->gtGetOp2();
6834 // For the GT_COMMA case the op1 is part of the orginal CSE tree
6835 // that is being kept because it contains some side-effect
6839 // This tree and all of its sub trees are being kept.
6840 return WALK_SKIP_SUBTREES;
6843 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
6844 // which can again be another GT_COMMA or the final side-effect part
6848 if (tree == keptTree)
6850 // This tree and all of its sub trees are being kept.
6851 return WALK_SKIP_SUBTREES;
6855 // This node is being removed from the graph of GenTreePtr
6857 // Look for any local variable references
6859 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
6864 /* This variable ref is going away, decrease its ref counts */
6866 lclNum = tree->gtLclVarCommon.gtLclNum;
6867 assert(lclNum < comp->lvaCount);
6868 varDsc = comp->lvaTable + lclNum;
6870 // make sure it's been initialized
6871 assert(comp->compCurBB != nullptr);
6872 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
6874 /* Decrement its lvRefCnt and lvRefCntWtd */
6876 // Use getBBWeight to determine the proper block weight.
6877 // This impacts the block weights when we have IBC data.
6878 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
6881 return WALK_CONTINUE;
6884 /*****************************************************************************
6886 * Routine called to decrement the LclVar ref counts when removing a tree
6887 * during the remove RangeCheck phase.
6888 * This method will decrement the refcounts for any LclVars used below 'deadTree',
6889 * unless the node is found in the 'keepList' (which are saved side effects)
6890 * The keepList is communicated using the walkData.pCallbackData field
6891 * Also the compCurBB must be set to the current BasicBlock which contains
6892 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
6895 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
6897 // We communicate this value using the walkData.pCallbackData field
6899 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
6902 /*****************************************************************************
6904 * Given an array index node, mark it as not needing a range check.
6907 void Compiler::optRemoveRangeCheck(GenTreePtr tree,
6909 bool updateCSEcounts,
6910 unsigned sideEffFlags,
6927 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
6930 noway_assert(stmt->gtOper == GT_STMT);
6931 noway_assert(tree->gtOper == GT_COMMA);
6932 noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
6933 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
6935 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
6940 printf("Before optRemoveRangeCheck:\n");
6945 GenTreePtr sideEffList = nullptr;
6948 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
6951 // Decrement the ref counts for any LclVars that are being deleted
6953 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
6955 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
6956 tree->gtOp.gtOp1 = (sideEffList != NULL) ? sideEffList : gtNewNothingNode();
6958 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
6959 tree->gtFlags |= GTF_DONT_CSE;
6961 /* Recalculate the gtCostSz, etc... */
6962 gtSetStmtInfo(stmt);
6964 /* Re-thread the nodes if necessary */
6965 if (fgStmtListThreaded)
6973 printf("After optRemoveRangeCheck:\n");
6980 /*****************************************************************************
6981 * Return the scale in an array reference, given a pointer to the
6982 * multiplication node.
6985 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr *pIndex DEBUGARG(bool bRngChk))
6988 assert (mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
6989 assert (mul->gtOp.gtOp2->IsCnsIntOrI());
6991 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
6993 if (mul->gtOper == GT_LSH)
6994 scale = ((ssize_t)1) << scale;
6996 GenTreePtr index = mul->gtOp.gtOp1;
6998 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7000 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7001 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7002 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7003 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7004 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7005 index = index->gtOp.gtOp1;
7008 assert (!bRngChk || index->gtOper != GT_COMMA);
7016 /*****************************************************************************
7017 * Find the last assignment to of the local variable in the block. Return
7018 * RHS or NULL. If any local variable in the RHS has been killed in
7019 * intervening code, return NULL. If the variable being searched for is killed
7020 * in the intervening code, return NULL.
7024 GenTreePtr Compiler::optFindLocalInit(BasicBlock *block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* pLhsRhsKilledAfterInit)
7026 assert(pKilledInOut);
7027 assert(pLhsRhsKilledAfterInit);
7029 *pLhsRhsKilledAfterInit = false;
7031 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7033 GenTreePtr list = block->bbTreeList;
7039 GenTreePtr rhs = NULL;
7040 GenTreePtr stmt = list;
7043 stmt = stmt->gtPrev;
7049 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7050 // If we encounter an assignment to a local variable,
7051 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7053 // And the assigned variable equals the input local,
7054 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7056 // If the assignment is '=' and it is not a conditional, then return rhs.
7057 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7059 rhs = tree->gtOp.gtOp2;
7061 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7062 // as we found a kill to the input local.
7065 *pLhsRhsKilledAfterInit = true;
7066 assert(rhs == NULL);
7072 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7077 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7081 while (stmt != list);
7088 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7089 varRefKinds rhsRefs = VR_NONE;
7090 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7091 bool b = lvaLclVarRefs(rhs, NULL, &rhsRefs, &rhsLocals);
7093 !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) ||
7094 (rhsRefs != VR_NONE))
7096 // If RHS has been indirectly referenced, consider it a write and a kill.
7097 *pLhsRhsKilledAfterInit = true;
7104 /*****************************************************************************
7106 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7111 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2,
7114 if (op1->gtOper == GT_CNS_INT &&
7115 op2->gtOper == GT_CNS_INT)
7117 add1 += op1->gtIntCon.gtIconVal;
7118 add2 += op2->gtIntCon.gtIconVal;
7122 /* Check for +/- constant on either operand */
7124 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7126 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7127 op1 = op1->gtOp.gtOp1;
7130 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7132 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7133 op2 = op2->gtOp.gtOp1;
7136 /* We only allow local variable references */
7138 if (op1->gtOper != GT_LCL_VAR)
7140 if (op2->gtOper != GT_LCL_VAR)
7142 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7145 /* NOTE: Caller ensures that this variable has only one def */
7147 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7148 // printf("size [%d]:\n", add2); gtDispTree(op2);
7153 return (bool)(add1 <= add2);
7158 //------------------------------------------------------------------------------
7159 // optObtainLoopCloningOpts: Identify optimization candidates and update
7160 // the "context" for array optimizations.
7163 // context - data structure where all loop cloning info is kept. The
7164 // optInfo fields of the context are updated with the
7165 // identified optimization candidates.
7167 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7169 for (unsigned i = 0; i < optLoopCount; i++)
7171 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7172 if (optIsLoopClonable(i))
7174 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7176 optIdentifyLoopOptInfo(i, context);
7179 JITDUMP("------------------------------------------------------------\n");
7184 //------------------------------------------------------------------------
7185 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7186 // check if the loop is suitable for the optimizations performed.
7189 // loopNum - the current loop index for which conditions are derived.
7190 // context - data structure where all loop cloning candidates will be
7194 // If the loop is not suitable for the optimizations, return false - context
7195 // should not contain any optimization candidate for the loop if false.
7196 // Else return true.
7199 // Check if the loop is well formed for this optimization and identify the
7200 // optimization candidates and update the "context" parameter with all the
7201 // contextual information necessary to perform the optimization later.
7203 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7205 noway_assert(loopNum < optLoopCount);
7207 LoopDsc* pLoop = &optLoopTable[loopNum];
7209 if (!(pLoop->lpFlags & LPFLG_ITER))
7211 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7215 unsigned ivLclNum = pLoop->lpIterVar();
7216 if (lvaVarAddrExposed(ivLclNum))
7218 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7222 BasicBlock* head = pLoop->lpHead;
7223 BasicBlock* end = pLoop->lpBottom;
7224 BasicBlock* beg = head->bbNext;
7226 if (end->bbJumpKind != BBJ_COND)
7228 JITDUMP("> Couldn't find termination test.\n");
7232 if (end->bbJumpDest != beg)
7234 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7238 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7239 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7241 JITDUMP("> Loop iteration operator not matching\n");
7245 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 &&
7246 (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7247 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7249 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7253 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7254 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7256 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7257 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7261 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7263 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n", pLoop->lpTestTree->gtTreeID);
7268 GenTreePtr op1 = pLoop->lpIterator();
7269 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7272 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum, end->bbNext ? end->bbNext->bbNum : 0);
7274 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7275 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7278 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7281 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7288 //---------------------------------------------------------------------------------------------------------------
7289 // optExtractArrIndex: Try to extract the array index from "tree".
7292 // tree the tree to be checked if it is the array [] operation.
7293 // result the extracted GT_INDEX information is updated in result.
7294 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7297 // Returns true if array index can be extracted, else, return false. See assumption about
7298 // what will be extracted. The "result" variable's rank parameter is advanced for every
7299 // dimension of [] encountered.
7302 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7303 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7304 // to reconstruct this to be able to know if this is an array access.
7307 // The method extracts only if the array base and indices are GT_LCL_VAR.
7309 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7311 // [000000001AF828D8] ---XG------- indir int
7312 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7313 // [000000001AF87340] ------------ + byref
7314 // [000000001AF87160] -------N---- const long 2
7315 // [000000001AF871D8] ------------ << long
7316 // [000000001AF870C0] ------------ cast long <- int
7317 // [000000001AF86F30] i----------- lclVar int V04 loc0
7318 // [000000001AF87250] ------------ + byref
7319 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7320 // [000000001AF87468] ---XG------- comma int
7321 // [000000001AF87020] ---X-------- arrBndsChk void
7322 // [000000001AF86FA8] ---X-------- arrLen int
7323 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7324 // [000000001AF82860] ------------ lclVar int V04 loc0
7325 // [000000001AF829F0] -A-XG------- = int
7326 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7328 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7330 if (tree->gtOper != GT_COMMA)
7334 GenTreePtr before = tree->gtGetOp1();
7335 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7339 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7340 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7344 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7348 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7349 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7354 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7356 GenTreePtr after = tree->gtGetOp2();
7358 if (after->gtOper != GT_IND)
7362 GenTreePtr sibo = after->gtGetOp1();
7363 if (sibo->gtOper != GT_ADD)
7367 GenTreePtr sib = sibo->gtGetOp1();
7368 GenTreePtr ofs = sibo->gtGetOp2();
7369 if (ofs->gtOper != GT_CNS_INT)
7373 if (sib->gtOper != GT_ADD)
7377 GenTreePtr si = sib->gtGetOp2();
7378 GenTreePtr base = sib->gtGetOp1();
7379 if (si->gtOper != GT_LSH)
7383 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7387 GenTreePtr scale = si->gtGetOp2();
7388 GenTreePtr index = si->gtGetOp1();
7389 if (scale->gtOper != GT_CNS_INT)
7393 #ifdef _TARGET_AMD64_
7394 if (index->gtOper != GT_CAST)
7398 GenTreePtr indexVar = index->gtGetOp1();
7400 GenTreePtr indexVar = index;
7402 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7406 if (lhsNum == BAD_VAR_NUM)
7408 result->arrLcl = arrLcl;
7410 result->indLcls.Push(indLcl);
7411 result->bndsChks.Push(tree);
7412 result->useBlock = compCurBB;
7418 //---------------------------------------------------------------------------------------------------------------
7419 // optReconstructArrIndex: Reconstruct array index.
7422 // tree the tree to be checked if it is an array [][][] operation.
7423 // result the extracted GT_INDEX information.
7424 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7427 // Returns true if array index can be extracted, else, return false. "rank" field in
7428 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7431 // Recursively look for a list of array indices. In the example below, we encounter,
7432 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7433 // The return value would then be:
7434 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7436 // V00[V01][V02] would be morphed as:
7438 // [000000001B366848] ---XG------- indir int
7439 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7440 // [000000001B36C200] ---XG------- comma int
7441 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7442 // [000000001B36C278] -A-XG------- comma int
7443 // [000000001B366730] R--XG------- indir ref
7444 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7445 // [000000001B36C818] ---XG------- comma ref
7446 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7447 // [000000001B36BB60] -A-XG------- = ref
7448 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7449 // [000000001B36A668] -A-XG------- = int
7450 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7453 // The method extracts only if the array base and indices are GT_LCL_VAR.
7455 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7457 // If we can extract "tree" (which is a top level comma) return.
7458 if (optExtractArrIndex(tree, result, lhsNum))
7462 // We have a comma (check if array base expr is computed in "before"), descend further.
7463 else if (tree->OperGet() == GT_COMMA)
7465 GenTreePtr before = tree->gtGetOp1();
7466 // "before" should evaluate an array base for the "after" indexing.
7467 if (before->OperGet() != GT_ASG)
7471 GenTreePtr lhs = before->gtGetOp1();
7472 GenTreePtr rhs = before->gtGetOp2();
7474 // "rhs" should contain an GT_INDEX
7475 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7479 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7480 GenTreePtr after = tree->gtGetOp2();
7481 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7482 return optExtractArrIndex(after, result, lhsNum);
7488 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7490 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*) data->pCallbackData);
7494 //-------------------------------------------------------------------------
7495 // optIsStackLocalInvariant: Is stack local invariant in loop.
7498 // loopNum The loop in which the variable is tested for invariance.
7499 // lclNum The local that is tested for invariance in the loop.
7502 // Returns true if the variable is loop invariant in loopNum.
7504 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7506 if (lvaVarAddrExposed(lclNum))
7510 if (optIsVarAssgLoop(loopNum, lclNum))
7517 //----------------------------------------------------------------------------------------------
7518 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7519 // identify as potential candidate and update the loop context.
7522 // tree The tree encountered during the tree walk.
7523 // info Supplies information about the current block or stmt in which the tree is.
7524 // Also supplies the "context" pointer for updating with loop cloning
7525 // candidates. Also supplies loopNum.
7528 // If array index can be reconstructed, check if the iter var of the loop matches the
7529 // array index var in some dim. Also ensure other index vars before the identified
7530 // dim are loop invariant.
7533 // Skip sub trees if the optimization candidate is identified or else continue walking
7535 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7537 ArrIndex arrIndex(getAllocator());
7539 // Check if array index can be optimized.
7540 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7542 assert(tree->gtOper == GT_COMMA);
7546 JITDUMP("Found ArrIndex at tree ");
7548 printf(" which is equivalent to: ");
7553 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7555 return WALK_SKIP_SUBTREES;
7558 // Walk the dimensions and see if iterVar of the loop is used as index.
7559 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7561 // Is index variable also used as the loop iter var.
7562 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7564 // Check the previous indices are all loop invariant.
7565 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7567 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7569 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7570 return WALK_SKIP_SUBTREES;
7576 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7578 JITDUMP(" on dim %d\n", dim);
7581 // Update the loop context.
7582 info->context->EnsureLoopOptInfo(info->loopNum)->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7586 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), dim);
7589 return WALK_SKIP_SUBTREES;
7591 else if (tree->gtOper == GT_ARR_ELEM)
7593 // TODO-CQ: CLONE: Implement.
7594 return WALK_SKIP_SUBTREES;
7596 return WALK_CONTINUE;
7599 struct optRangeCheckDsc
7601 Compiler* pCompiler;
7605 Walk to make sure that only locals and constants are contained in the index
7608 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr * pTree, fgWalkData *data)
7610 GenTreePtr tree = *pTree;
7611 optRangeCheckDsc* pData= (optRangeCheckDsc*) data->pCallbackData;
7613 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR ||
7614 tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7616 pData->bValidIndex = false;
7620 if (tree->gtOper == GT_LCL_VAR)
7622 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7624 pData->bValidIndex = false;
7629 return WALK_CONTINUE;
7633 returns true if a range check can legally be removed (for the moment it checks
7634 that the array is a local array (non subject to racing conditions) and that the
7635 index is either a constant or a local
7637 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7639 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7640 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7641 GenTreePtr pArray = bndsChk->GetArray();
7642 if (pArray == NULL && !bndsChk->gtArrLen->IsCnsIntOrI())
7646 GenTreePtr pIndex = bndsChk->gtIndex;
7648 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
7649 // Otherwise we can be targeted by malicious race-conditions.
7652 if ( pArray->gtOper != GT_LCL_VAR )
7658 printf("Can't remove range check if the array isn't referenced with a local\n");
7666 noway_assert(pArray->gtType == TYP_REF);
7667 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
7669 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
7671 // If the array address has been taken, don't do the optimization
7672 // (this restriction can be lowered a bit, but i don't think it's worth it)
7673 CLANG_FORMAT_COMMENT_ANCHOR;
7677 printf("Can't remove range check if the array has its address taken\n");
7687 optRangeCheckDsc Data;
7688 Data.pCompiler =this;
7689 Data.bValidIndex=true;
7691 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
7693 if (!Data.bValidIndex)
7698 printf("Can't remove range check with this index");
7710 /******************************************************************************
7712 * Replace x==null with (x|x)==0 if x is a GC-type.
7713 * This will stress code-gen and the emitter to make sure they support such trees.
7718 void Compiler::optOptimizeBoolsGcStress(BasicBlock * condBlock)
7720 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
7723 noway_assert(condBlock->bbJumpKind == BBJ_COND);
7724 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
7726 noway_assert(condStmt->gtOper == GT_JTRUE);
7731 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
7733 if (comparand == NULL || !varTypeIsGC(comparand->TypeGet()))
7736 if (comparand->gtFlags & (GTF_ASG|GTF_CALL|GTF_ORDER_SIDEEFF))
7739 GenTreePtr comparandClone = gtCloneExpr(comparand);
7741 // Bump up the ref-counts of any variables in 'comparandClone'
7742 compCurBB = condBlock;
7743 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void *)this, true);
7745 noway_assert(relop->gtOp.gtOp1 == comparand);
7746 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
7747 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
7749 // Comparand type is already checked, and we have const int, there is no harm
7750 // morphing it into a TYP_I_IMPL.
7751 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
7752 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
7757 /******************************************************************************
7758 * Function used by folding of boolean conditionals
7759 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
7760 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
7761 * being a boolean lclVar and "op2" the const 0/1.
7762 * On success, the comparand (ie. boolVal) is returned. Else NULL.
7763 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
7764 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
7765 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
7766 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
7769 GenTree * Compiler::optIsBoolCond(GenTree * condBranch,
7770 GenTree * * compPtr,
7773 bool isBool = false;
7775 noway_assert(condBranch->gtOper == GT_JTRUE);
7776 GenTree * cond = condBranch->gtOp.gtOp1;
7778 /* The condition must be "!= 0" or "== 0" */
7780 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
7783 /* Return the compare node to the caller */
7787 /* Get hold of the comparands */
7789 GenTree * opr1 = cond->gtOp.gtOp1;
7790 GenTree * opr2 = cond->gtOp.gtOp2;
7792 if (opr2->gtOper != GT_CNS_INT)
7795 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
7798 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
7800 /* Is the value a boolean?
7801 * We can either have a boolean expression (marked GTF_BOOLEAN) or
7802 * a local variable that is marked as being boolean (lvIsBoolean) */
7804 if (opr1->gtFlags & GTF_BOOLEAN)
7808 else if ((opr1->gtOper == GT_CNS_INT) &&
7809 (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
7813 else if (opr1->gtOper == GT_LCL_VAR)
7815 /* is it a boolean local variable */
7817 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
7818 noway_assert(lclNum < lvaCount);
7820 if (lvaTable[lclNum].lvIsBoolean)
7824 /* Was our comparison against the constant 1 (i.e. true) */
7827 // If this is a boolean expression tree we can reverse the relop
7828 // and change the true to false.
7831 gtReverseCond(cond);
7832 opr2->gtIntCon.gtIconVal = 0;
7843 void Compiler::optOptimizeBools()
7848 printf("*************** In optOptimizeBools()\n");
7851 printf("Blocks/Trees before phase\n");
7852 fgDispBasicBlocks(true);
7862 for (BasicBlock * b1 = fgFirstBB; b1; b1 = b1->bbNext)
7864 /* We're only interested in conditional jumps here */
7866 if (b1->bbJumpKind != BBJ_COND)
7869 /* If there is no next block, we're done */
7871 BasicBlock * b2 = b1->bbNext;
7875 /* The next block must not be marked as BBF_DONT_REMOVE */
7876 if (b2->bbFlags & BBF_DONT_REMOVE)
7879 /* The next block also needs to be a condition */
7881 if (b2->bbJumpKind != BBJ_COND)
7884 optOptimizeBoolsGcStress(b1);
7889 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
7891 if (b1->bbJumpDest == b2->bbJumpDest)
7893 /* Given the following sequence of blocks :
7897 we wil try to fold it to :
7898 B1: brtrue(t1|t2, BX)
7904 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
7906 /* Given the following sequence of blocks :
7910 we will try to fold it to :
7911 B1: brtrue((!t1)&&t2, B3)
7922 /* The second block must contain a single statement */
7924 GenTreePtr s2 = b2->bbTreeList;
7925 if (s2->gtPrev != s2)
7928 noway_assert(s2->gtOper == GT_STMT);
7929 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
7930 noway_assert(t2->gtOper == GT_JTRUE);
7932 /* Find the condition for the first block */
7934 GenTreePtr s1 = b1->bbTreeList->gtPrev;
7936 noway_assert(s1->gtOper == GT_STMT);
7937 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
7938 noway_assert(t1->gtOper == GT_JTRUE);
7940 if (b2->countOfInEdges() > 1)
7943 /* Find the branch conditions of b1 and b2 */
7947 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
7950 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
7953 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
7954 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
7956 // Leave out floats where the bit-representation is more complicated
7957 // - there are two representations for 0.
7959 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
7962 // Make sure the types involved are of the same sizes
7963 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
7965 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
7967 #ifdef _TARGET_ARMARCH_
7968 // Skip the small operand which we cannot encode.
7969 if (varTypeIsSmall(c1->TypeGet()))
7972 /* The second condition must not contain side effects */
7974 if (c2->gtFlags & GTF_GLOB_EFFECT)
7977 /* The second condition must not be too expensive */
7981 if (c2->gtCostEx > 12)
7986 var_types foldType = c1->TypeGet();
7987 if (varTypeIsGC(foldType))
7989 foldType = TYP_I_IMPL;
7994 /* Both conditions must be the same */
7996 if (t1->gtOper != t2->gtOper)
7999 if (t1->gtOper == GT_EQ)
8001 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8002 So we will branch to BX if (c1&c2)==0 */
8009 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8010 So we will branch to BX if (c1|c2)!=0 */
8018 /* The b1 condition must be the reverse of the b2 condition */
8020 if (t1->gtOper == t2->gtOper)
8023 if (t1->gtOper == GT_EQ)
8025 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8026 So we will branch to BX if (c1&c2)!=0 */
8033 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8034 So we will branch to BX if (c1|c2)==0 */
8041 // Anding requires both values to be 0 or 1
8043 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8047 // Now update the trees
8049 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8052 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8053 cmpOp1->gtFlags |= GTF_BOOLEAN;
8057 t1->gtOp.gtOp1 = cmpOp1;
8058 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8060 #if FEATURE_SET_FLAGS
8061 // For comparisons against zero we will have the GTF_SET_FLAGS set
8062 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8063 // during the CSE phase.
8065 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8066 // as they are no longer feeding directly into a comparisons against zero
8068 // Make sure that the GTF_SET_FLAGS bit is cleared.
8069 // Fix 388436 ARM JitStress WP7
8070 c1->gtFlags &= ~GTF_SET_FLAGS;
8071 c2->gtFlags &= ~GTF_SET_FLAGS;
8073 // The new top level node that we just created does feed directly into
8074 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8075 // we generate an instuction that sets the flags, which allows us
8076 // to omit the cmp with zero instruction.
8078 // Request that the codegen for cmpOp1 sets the condition flags
8079 // when it generates the code for cmpOp1.
8081 cmpOp1->gtRequestSetFlags();
8084 flowList * edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8087 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8091 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8095 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8097 fgRemoveRefPred(b1->bbJumpDest, b1);
8099 b1->bbJumpDest = b2->bbJumpDest;
8101 fgAddRefPred(b2->bbJumpDest, b1);
8104 noway_assert(edge1 != NULL);
8105 noway_assert(edge2 != NULL);
8107 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8108 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8109 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8111 edge1->flEdgeWeightMin = edgeSumMin;
8112 edge1->flEdgeWeightMax = edgeSumMax;
8116 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8117 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8120 /* Get rid of the second block (which is a BBJ_COND) */
8122 noway_assert(b1->bbJumpKind == BBJ_COND);
8123 noway_assert(b2->bbJumpKind == BBJ_COND);
8124 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8125 noway_assert(b1->bbNext == b2); noway_assert(b2->bbNext);
8128 b2->bbFlags |= BBF_REMOVED;
8130 // If b2 was the last block of a try or handler, update the EH table.
8132 ehUpdateForDeletedBlock(b2);
8134 /* Update bbRefs and bbPreds */
8136 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8137 * Remove pred 'b2' for 'b2->bbJumpDest' */
8139 fgReplacePred(b2->bbNext, b2, b1);
8141 fgRemoveRefPred(b2->bbJumpDest, b2);
8143 /* Update the block numbers and try again */
8155 // Update loop table
8156 fgUpdateLoopsAfterCompacting(b1, b2);
8161 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n",
8162 c2->OperIsLeaf() ? "" : "non-leaf ",
8163 b1->bbNum, b2->bbNum);
8164 gtDispTree(s1); printf("\n");
8172 fgDebugCheckBBlist();