1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning ( disable : 4701 )
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = NULL;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
52 DataFlow::DataFlow(Compiler* pCompiler)
53 : m_pCompiler(pCompiler)
57 /*****************************************************************************
61 void Compiler::optSetBlockWeights()
63 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
64 assert(fgDomsComputed);
70 bool firstBBdomsRets = true;
74 for (block = fgFirstBB; (block != NULL); block = block->bbNext)
76 /* Blocks that can't be reached via the first block are rarely executed */
77 if (!fgReachable(fgFirstBB, block))
78 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock *begBlk,
139 /* Calculate the 'loopWeight',
140 this is the amount to increase each block in the loop
141 Our heuristic is that loops are weighted eight times more
142 than straight line code.
143 Thus we increase each block by 7 times the weight of
144 the loop header block,
145 if the loops are all properly formed gives us:
146 (assuming that BB_LOOP_WEIGHT is 8)
148 1 -- non loop basic block
149 8 -- single loop nesting
150 64 -- double loop nesting
151 512 -- triple loop nesting
155 noway_assert(begBlk->bbNum <= endBlk->bbNum);
156 noway_assert(begBlk->isLoopHead());
157 noway_assert(fgReachable(begBlk, endBlk));
161 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
165 noway_assert(!opts.MinOpts());
167 /* Build list of backedges for block begBlk */
168 flowList * backedgeList = NULL;
170 for (flowList* pred = begBlk->bbPreds;
174 /* Is this a backedge? */
175 if (pred->flBlock->bbNum >= begBlk->bbNum)
177 flowList* flow = new (this, CMK_FlowList) flowList();
179 #if MEASURE_BLOCK_SIZE
181 genFlowNodeSize += sizeof(flowList);
182 #endif // MEASURE_BLOCK_SIZE
184 flow->flNext = backedgeList;
185 flow->flBlock = pred->flBlock;
190 /* At least one backedge must have been found (the one from endBlk) */
191 noway_assert(backedgeList);
193 BasicBlock * curBlk = begBlk;
197 noway_assert(curBlk);
199 // For curBlk to be part of a loop that starts at begBlk
200 // curBlk must be reachable from begBlk and (since this is a loop)
201 // likewise begBlk must be reachable from curBlk.
204 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
206 /* If this block reaches any of the backedge blocks we set reachable */
207 /* If this block dominates any of the backedge blocks we set dominates */
208 bool reachable = false;
209 bool dominates = false;
211 for (flowList* tmp = backedgeList;
215 BasicBlock * backedge = tmp->flBlock;
217 if (!curBlk->isRunRarely())
219 reachable |= fgReachable(curBlk, backedge);
220 dominates |= fgDominate (curBlk, backedge);
222 if (dominates && reachable)
229 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
233 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
235 // We have real profile weights, so we aren't going to change this blocks weight
236 weight = curBlk->bbWeight;
241 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
244 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT/2);
248 // The multiplication may have caused us to overflow
250 if (weight < curBlk->bbWeight)
252 // The multiplication caused us to overflow
253 weight = BB_MAX_WEIGHT;
256 // Set the new weight
258 curBlk->modifyBBWeight(weight);
263 printf("\n BB%02u(wt=%s)",
265 refCntWtd2str(curBlk->getBBWeight(this)));
271 /* Stop if we've reached the last block in the loop */
273 if (curBlk == endBlk)
276 curBlk = curBlk->bbNext;
278 /* If we are excluding the endBlk then stop if we've reached endBlk */
280 if (excludeEndBlk && (curBlk == endBlk))
285 /*****************************************************************************
287 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
290 void Compiler::optUnmarkLoopBlocks(BasicBlock *begBlk,
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList * pred = begBlk->bbPreds;
312 curBlk = pred->flBlock;
314 /* is this a backward edge? (from curBlk to begBlk) */
316 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) &&
322 (curBlk->bbJumpKind != BBJ_ALWAYS) )
328 /* Only unmark the loop blocks if we have exactly one loop back edge */
329 if (backEdgeCount != 1)
334 if (backEdgeCount > 0)
336 printf("\nNot removing loop L%02u, due to an additional back edge",
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge",
348 noway_assert(backEdgeCount == 1);
349 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
359 noway_assert(curBlk);
361 // For curBlk to be part of a loop that starts at begBlk
362 // curBlk must be reachable from begBlk and (since this is a loop)
363 // likewise begBlk must be reachable from curBlk.
365 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
367 unsigned weight = curBlk->bbWeight;
369 // Don't unmark blocks that are set to BB_MAX_WEIGHT
370 // Don't unmark blocks when we are using profile weights
372 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
374 if (!fgDominate(curBlk, endBlk))
380 /* Merging of blocks can disturb the Dominates
381 information (see RAID #46649) */
382 if (weight < BB_LOOP_WEIGHT)
386 // We can overflow here so check for it
387 if (weight < curBlk->bbWeight)
388 weight = BB_MAX_WEIGHT;
390 assert (weight >= BB_LOOP_WEIGHT);
392 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
398 printf("\n BB%02u(wt=%s)",
400 refCntWtd2str(curBlk->getBBWeight(this)));
404 /* Stop if we've reached the last block in the loop */
406 if (curBlk == endBlk)
409 curBlk = curBlk->bbNext;
411 /* Stop if we go past the last block in the loop, as it may have been deleted */
412 if (curBlk->bbNum > endBlk->bbNum)
417 /*****************************************************************************************************
419 * Function called to update the loop table and bbWeight before removing a block
422 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock * block,
428 noway_assert(!opts.MinOpts());
430 bool removeLoop = false;
432 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
433 /* Special case: the block was the head of a loop - or pointing to a loop entry */
435 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
437 /* Some loops may have been already removed by
438 * loop unrolling or conditional folding */
440 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
443 if (block == optLoopTable[loopNum].lpEntry ||
444 block == optLoopTable[loopNum].lpBottom )
446 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
453 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
454 optPrintLoopInfo(loopNum);
458 /* If the loop is still in the table
459 * any block in the loop must be reachable !!! */
461 noway_assert(optLoopTable[loopNum].lpEntry != block);
462 noway_assert(optLoopTable[loopNum].lpBottom != block);
464 if (optLoopTable[loopNum].lpExit == block)
466 optLoopTable[loopNum].lpExit = NULL;
467 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;;
471 /* If this points to the actual entry in the loop
472 * then the whole loop may become unreachable */
474 switch (block->bbJumpKind)
477 BasicBlock * * jumpTab;
481 if (block->bbNext == optLoopTable[loopNum].lpEntry)
486 if (block->bbJumpKind == BBJ_NONE)
492 noway_assert(block->bbJumpDest);
493 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
500 jumpCnt = block->bbJumpSwt->bbsCount;
501 jumpTab = block->bbJumpSwt->bbsDstTab;
504 noway_assert(*jumpTab);
505 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
509 } while (++jumpTab, --jumpCnt);
518 /* Check if the entry has other predecessors outside the loop
519 * TODO: Replace this when predecessors are available */
521 BasicBlock * auxBlock;
522 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
524 /* Ignore blocks in the loop */
526 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
527 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
530 switch (auxBlock->bbJumpKind)
533 BasicBlock * * jumpTab;
537 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
542 if (auxBlock->bbJumpKind == BBJ_NONE)
548 noway_assert(auxBlock->bbJumpDest);
549 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
556 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
557 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
561 noway_assert(*jumpTab);
562 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
566 } while (++jumpTab, --jumpCnt);
576 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
579 else if (optLoopTable[loopNum].lpHead == block)
581 /* The loop has a new head - Just update the loop table */
582 optLoopTable[loopNum].lpHead = block->bbPrev;
587 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
588 optPrintLoopInfo(loopNum);
593 if ((skipUnmarkLoop == false) &&
594 ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
595 (block->bbJumpDest->isLoopHead()) &&
596 (block->bbJumpDest->bbNum <= block->bbNum) &&
598 (fgCurBBEpochSize == fgDomBBcount + 1) &&
599 fgReachable(block->bbJumpDest, block))
601 optUnmarkLoopBlocks(block->bbJumpDest, block);
607 /*****************************************************************************
609 * Given the beginBlock of the loop, return the index of this loop
613 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
617 for (lnum = 0; lnum < optLoopCount; lnum++)
619 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
626 noway_assert(!"Loop number not found.");
631 /*****************************************************************************
633 * Print loop info in an uniform way.
636 void Compiler::optPrintLoopInfo(unsigned loopInd,
638 BasicBlock * lpFirst,
640 BasicBlock * lpEntry,
641 BasicBlock * lpBottom,
642 unsigned char lpExitCnt,
647 noway_assert(lpHead);
650 // NOTE: we take "loopInd" as an argument instead of using the one
651 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
652 // has not be set correctly. For example, in optRecordLoop().
653 // However, in most of the cases, loops should have been recorded.
654 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
655 // version of this method.
657 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
658 if (lpTop != lpFirst)
660 printf(" (loop top is BB%02u)", lpTop->bbNum);
663 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d",
670 if (lpExitCnt == 1) {
671 printf(" at BB%02u", lpExit->bbNum);
674 if (parentLoop != BasicBlock::NOT_IN_LOOP)
676 printf(", parent loop = L%02u", parentLoop);
681 /*****************************************************************************
683 * Print loop information given the index of the loop in the loop table.
686 void Compiler::optPrintLoopInfo(unsigned lnum)
688 noway_assert(lnum < optLoopCount);
690 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
692 optPrintLoopInfo(lnum,
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
776 // Obtain the relop from the "test" tree.
778 if (test->gtOper == GT_JTRUE)
780 relop = test->gtGetOp1();
784 assert(test->gtOper == GT_ASG);
785 relop = test->gtGetOp2();
788 noway_assert(relop->OperKind() & GTK_RELOP);
790 GenTreePtr opr1 = relop->gtOp.gtOp1;
791 GenTreePtr opr2 = relop->gtOp.gtOp2;
796 // Make sure op1 or op2 is the iterVar.
797 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
802 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
812 if (iterOp->gtType != TYP_INT)
817 // Mark the iterator node.
818 iterOp->gtFlags |= GTF_VAR_ITERATOR;
820 // Check what type of limit we have - constant, variable or arr-len.
821 if (limitOp->gtOper == GT_CNS_INT)
823 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
827 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
829 else if (limitOp->gtOper == GT_ARR_LENGTH)
831 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
837 // Save the type of the comparison between the iterator and the limit.
838 optLoopTable[loopInd].lpTestTree = relop;
842 //----------------------------------------------------------------------------------
843 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
846 // incr The incr tree to be checked. Whether incr tree is
847 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
850 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
851 // and the rhs limit is extracted from the "test" tree. The limit information is
852 // added to the loop table.
855 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
857 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
860 genTreeOps updateOper;
861 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
862 if (iterVar != BAD_VAR_NUM)
864 // We have v = v op y type asg node.
877 // Increment should be by a const int.
878 // TODO-CQ: CLONE: allow variable increments.
879 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
888 //----------------------------------------------------------------------------------
889 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
892 // from, to - are blocks (beg, end) which are part of the loop.
893 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
894 // pIterVar - see return value.
897 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
901 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
902 // assigned in the loop.
904 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
907 unsigned iterVar = optIsLoopIncrTree(incr);
908 if (iterVar == BAD_VAR_NUM)
912 if (optIsVarAssigned(from, to, incr, iterVar))
914 JITDUMP("iterVar is assigned in loop\n");
922 //----------------------------------------------------------------------------------
923 // optIsLoopTestEvalIntoTemp:
924 // Pattern match if the test tree is computed into a tmp
925 // and the "tmp" is used as jump condition for loop termination.
928 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
929 // where Vtmp contains the actual loop test result.
930 // newStmt - contains the statement that is the actual test stmt involving
931 // the loop iterator.
934 // Returns true if a new test tree can be obtained.
937 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
938 // Then returns the rhs for def of Vtmp as the "test" node.
941 // This method just retrieves what it thinks is the "test" node,
942 // the callers are expected to verify that "iterVar" is used in the test.
944 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
946 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
948 if (test->gtOper != GT_JTRUE)
953 GenTreePtr relop = test->gtGetOp1();
954 noway_assert(relop->OperIsCompare());
956 GenTreePtr opr1 = relop->gtOp.gtOp1;
957 GenTreePtr opr2 = relop->gtOp.gtOp2;
959 // Make sure we have jtrue (vtmp != 0)
960 if ((relop->OperGet() == GT_NE) &&
961 (opr1->OperGet() == GT_LCL_VAR) &&
962 (opr2->OperGet() == GT_CNS_INT) &&
963 opr2->IsIntegralConst(0))
965 // Get the previous statement to get the def (rhs) of Vtmp to see
966 // if the "test" is evaluated into Vtmp.
967 GenTreePtr prevStmt = testStmt->gtPrev;
968 if (prevStmt == nullptr)
973 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
974 if (tree->OperGet() == GT_ASG)
976 GenTreePtr lhs = tree->gtOp.gtOp1;
977 GenTreePtr rhs = tree->gtOp.gtOp2;
979 // Return as the new test node.
980 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
982 if (rhs->OperIsCompare())
993 //----------------------------------------------------------------------------------
994 // optExtractInitTestIncr:
995 // Extract the "init", "test" and "incr" nodes of the loop.
998 // head - Loop head block
999 // bottom - Loop bottom block
1000 // top - Loop top block
1001 // ppInit - The init stmt of the loop if found.
1002 // ppTest - The test stmt of the loop if found.
1003 // ppIncr - The incr stmt of the loop if found.
1006 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1007 // returns true. Returns false if the information can't be extracted.
1010 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1011 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1012 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1015 // +-------<-----------------<-----------+
1018 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1020 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1021 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1022 // in the jtrue condition.
1025 // This method just retrieves what it thinks is the "test" node,
1026 // the callers are expected to verify that "iterVar" is used in the test.
1028 bool Compiler::optExtractInitTestIncr(BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1030 assert(ppInit != nullptr);
1031 assert(ppTest != nullptr);
1032 assert(ppIncr != nullptr);
1034 // Check if last two statements in the loop body are the increment of the iterator
1035 // and the loop termination test.
1036 noway_assert(bottom->bbTreeList != nullptr);
1037 GenTreePtr test = bottom->bbTreeList->gtPrev;
1038 noway_assert(test != nullptr && test->gtNext == nullptr);
1041 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1046 // Check if we have the incr tree before the test tree, if we don't,
1047 // check if incr is part of the loop "top".
1048 GenTreePtr incr = test->gtPrev;
1049 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1051 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1056 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1057 GenTreePtr topLast = top->bbTreeList->gtPrev;
1058 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1068 assert(test != incr);
1070 // Find the last statement in the loop pre-header which we expect to be the initialization of
1071 // the loop iterator.
1072 GenTreePtr phdr = head->bbTreeList;
1073 if (phdr == nullptr)
1078 GenTreePtr init = phdr->gtPrev;
1079 noway_assert(init != nullptr && (init->gtNext == 0));
1081 // If it is a duplicated loop condition, skip it.
1082 if (init->gtFlags & GTF_STMT_CMPADD)
1084 // Must be a duplicated loop condition.
1085 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1086 init = init->gtPrev;
1087 noway_assert(init != nullptr);
1090 noway_assert(init->gtOper == GT_STMT);
1091 noway_assert(test->gtOper == GT_STMT);
1092 noway_assert(incr->gtOper == GT_STMT);
1094 *ppInit = init->gtStmt.gtStmtExpr;
1095 *ppTest = test->gtStmt.gtStmtExpr;
1096 *ppIncr = incr->gtStmt.gtStmtExpr;
1101 /*****************************************************************************
1103 * Record the loop in the loop table.
1106 void Compiler::optRecordLoop(BasicBlock * head,
1110 BasicBlock * bottom,
1112 unsigned char exitCnt)
1114 // Record this loop in the table, if there's room.
1116 assert(optLoopCount <= MAX_LOOP_NUM);
1117 if (optLoopCount == MAX_LOOP_NUM)
1120 loopOverflowThisMethod = true;
1125 // Assumed preconditions on the loop we're adding.
1126 assert(first->bbNum <= top->bbNum);
1127 assert(top->bbNum <= entry->bbNum);
1128 assert(entry->bbNum <= bottom->bbNum);
1129 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1131 // If the new loop contains any existing ones, add it in the right place.
1132 unsigned char loopInd = optLoopCount;
1133 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1135 unsigned char prev = prevPlus1 - 1;
1136 if (optLoopTable[prev].lpContainedBy(first, bottom))
1139 // Move up any loops if necessary.
1140 for (unsigned j = optLoopCount; j > loopInd; j--)
1142 optLoopTable[j] = optLoopTable[j-1];
1146 for (unsigned i = loopInd+1; i < optLoopCount; i++)
1148 // The loop is well-formed.
1149 assert(optLoopTable[i].lpWellFormed());
1150 // Check for disjoint.
1151 if (optLoopTable[i].lpDisjoint(first, bottom)) continue;
1152 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1153 assert(optLoopTable[i].lpContainedBy(first, bottom));
1157 optLoopTable[loopInd].lpHead = head;
1158 optLoopTable[loopInd].lpFirst = first;
1159 optLoopTable[loopInd].lpTop = top;
1160 optLoopTable[loopInd].lpBottom = bottom;
1161 optLoopTable[loopInd].lpEntry = entry;
1162 optLoopTable[loopInd].lpExit = exit;
1163 optLoopTable[loopInd].lpExitCnt = exitCnt;
1165 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1166 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1167 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1169 optLoopTable[loopInd].lpFlags = 0;
1171 // We haven't yet recorded any side effects.
1172 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1173 optLoopTable[loopInd].lpFieldsModified = nullptr;
1174 optLoopTable[loopInd].lpArrayElemTypesModified = 0;
1176 // If DO-WHILE loop mark it as such.
1177 if (head->bbNext == entry)
1179 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1182 // If single exit loop mark it as such.
1186 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1190 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1191 // We have the following restrictions:
1192 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1193 // 2. There must be a loop iterator (a local var) that is
1194 // incremented (decremented or lsh, rsh, mul) with a constant value
1195 // 3. The iterator is incremented exactly once
1196 // 4. The loop condition must use the iterator.
1198 if (bottom->bbJumpKind == BBJ_COND)
1203 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1208 unsigned iterVar = BAD_VAR_NUM;
1209 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1214 // Make sure the "iterVar" initialization is never skipped,
1215 // i.e. HEAD dominates the ENTRY.
1216 if (!fgDominate(head, entry))
1221 if (!optPopulateInitInfo(loopInd, init, iterVar))
1226 // Check that the iterator is used in the loop condition.
1227 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1232 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1233 // Record the iterator, the pointer to the test node
1234 // and the initial value of the iterator (constant or local var)
1235 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1238 optLoopTable[loopInd].lpIterTree = incr;
1241 // Save the initial value of the iterator - can be lclVar or constant
1242 // Flag the loop accordingly.
1248 simpleTestLoopCount++;
1251 // Check if a constant iteration loop.
1252 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) &&
1253 (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1255 // This is a constant loop.
1256 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1258 constIterLoopCount++;
1265 printf("\nConstant loop initializer:\n");
1268 printf("\nConstant loop body:\n");
1270 BasicBlock* block = head;
1273 block = block->bbNext;
1274 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1276 if (stmt->gtStmt.gtStmtExpr == incr)
1279 gtDispTree(stmt->gtStmt.gtStmtExpr);
1282 while (block != bottom);
1289 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1294 //------------------------------------------------------------------------
1295 // optPrintLoopRecording: Print a recording of the loop.
1298 // loopInd - loop index.
1300 void Compiler::optPrintLoopRecording(unsigned loopInd)
1302 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1303 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1304 optLoopTable[loopInd].lpHead,
1305 optLoopTable[loopInd].lpFirst,
1306 optLoopTable[loopInd].lpTop,
1307 optLoopTable[loopInd].lpEntry,
1308 optLoopTable[loopInd].lpBottom,
1309 optLoopTable[loopInd].lpExitCnt,
1310 optLoopTable[loopInd].lpExit
1313 // If an iterator loop print the iterator and the initialization.
1314 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1316 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1318 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1320 printf("%d )", optLoopTable[loopInd].lpIterConst());
1322 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1323 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1324 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1325 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1327 // If a simple test condition print operator and the limits */
1328 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1330 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1331 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1333 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1334 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1342 void Compiler::optCheckPreds()
1345 BasicBlock * blockPred;
1348 for (block = fgFirstBB; block; block = block->bbNext)
1350 for (pred = block->bbPreds; pred; pred = pred->flNext)
1352 // make sure this pred is part of the BB list
1353 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1355 if (blockPred == pred->flBlock)
1358 noway_assert(blockPred);
1359 switch (blockPred->bbJumpKind)
1362 if (blockPred->bbJumpDest == block)
1366 noway_assert(blockPred->bbNext == block);
1368 case BBJ_EHFILTERRET:
1370 case BBJ_EHCATCHRET:
1371 noway_assert(blockPred->bbJumpDest == block);
1382 /*****************************************************************************
1383 * Find the natural loops, using dominators. Note that the test for
1384 * a loop is slightly different from the standard one, because we have
1385 * not done a depth first reordering of the basic blocks.
1388 void Compiler::optFindNaturalLoops()
1392 printf("*************** In optFindNaturalLoops()\n");
1397 flowList * predEntry;
1399 noway_assert(fgDomsComputed);
1403 hasMethodLoops = false;
1404 loopsThisMethod = 0;
1405 loopOverflowThisMethod = false;
1408 /* We will use the following terminology:
1409 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1410 Not part of the looping of the loop.
1411 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1412 * but not the outer loop. ???)
1413 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1414 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1415 * EXIT - the loop exit or the block right after the bottom
1416 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1418 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1449 BasicBlock * bottom;
1452 unsigned char exitCount;
1455 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1461 // Blocks that are rarely run have a zero bbWeight and should
1462 // never be optimized here
1464 if (top->bbWeight == BB_ZERO_WEIGHT)
1467 for (pred = top->bbPreds; pred; pred = pred->flNext)
1469 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1470 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1471 * as the definition says, but merely an indication that we have a loop there).
1472 * Thus, we have to be very careful and after entry discovery check that it is indeed
1473 * the only place we enter the loop (especially for non-reducible flow graphs).
1476 bottom = pred->flBlock;
1479 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1481 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) ||
1482 (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1483 (bottom->bbJumpKind == BBJ_EHCATCHRET) ||
1484 (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1485 (bottom->bbJumpKind == BBJ_SWITCH) )
1487 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1488 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1492 BasicBlock * loopBlock;
1494 /* The presence of a "back edge" is an indication that a loop might be present here
1497 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1498 * node in the loop to any other node in the loop (wholly within the loop)
1499 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1500 * in the loop from outside the loop, and that is through the ENTRY
1503 /* Let's find the loop ENTRY */
1505 if (head->bbJumpKind == BBJ_ALWAYS)
1507 if (head->bbJumpDest->bbNum <= bottom->bbNum &&
1508 head->bbJumpDest->bbNum >= top->bbNum )
1510 /* OK - we enter somewhere within the loop */
1511 entry = head->bbJumpDest;
1513 /* some useful asserts
1514 * Cannot enter at the top - should have being caught by redundant jumps */
1516 assert ((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1520 /* special case - don't consider now */
1521 //assert (!"Loop entered in weird way!");
1525 // Can we fall through into the loop?
1526 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1528 /* The ENTRY is at the TOP (a do-while loop) */
1533 goto NO_LOOP; // head does not flow into the loop bail for now
1536 // Now we find the "first" block -- the earliest block reachable within the loop.
1537 // This is usually the same as "top", but can differ in rare cases where "top" is
1538 // the entry block of a nested loop, and that nested loop branches backwards to a
1539 // a block before "top". We find this by searching for such backwards branches
1540 // in the loop known so far.
1541 BasicBlock* first = top;
1542 BasicBlock* newFirst;
1543 bool blocksToSearch = true;
1544 BasicBlock* validatedAfter = bottom->bbNext;
1545 while (blocksToSearch)
1547 blocksToSearch = false;
1549 blocksToSearch = false;
1550 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1552 unsigned nSucc = loopBlock->NumSucc();
1553 for (unsigned j = 0; j < nSucc; j++)
1555 BasicBlock* succ = loopBlock->GetSucc(j);
1556 if ( (newFirst == nullptr && succ->bbNum < first->bbNum)
1557 || (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1563 if (newFirst != nullptr)
1565 validatedAfter = first;
1567 blocksToSearch = true;
1571 // Is "head" still before "first"? If not, we don't have a valid loop...
1572 if (head->bbNum >= first->bbNum)
1574 JITDUMP("Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1575 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1579 /* Make sure ENTRY dominates all blocks in the loop
1580 * This is necessary to ensure condition 2. above
1581 * At the same time check if the loop has a single exit
1582 * point - those loops are easier to optimize */
1584 for (loopBlock = top;
1585 loopBlock != bottom->bbNext;
1586 loopBlock = loopBlock->bbNext)
1588 if (!fgDominate(entry, loopBlock))
1593 if (loopBlock == bottom)
1595 if (bottom->bbJumpKind != BBJ_ALWAYS)
1597 /* there is an exit at the bottom */
1599 noway_assert(bottom->bbJumpDest == top);
1606 BasicBlock * exitPoint;
1608 switch (loopBlock->bbJumpKind)
1611 case BBJ_CALLFINALLY:
1613 case BBJ_EHCATCHRET:
1614 assert (loopBlock->bbJumpDest);
1615 exitPoint = loopBlock->bbJumpDest;
1617 if (exitPoint->bbNum < top->bbNum ||
1618 exitPoint->bbNum > bottom->bbNum )
1620 /* exit from a block other than BOTTOM */
1629 case BBJ_EHFINALLYRET:
1630 case BBJ_EHFILTERRET:
1631 /* The "try" associated with this "finally" must be in the
1632 * same loop, so the finally block will return control inside the loop */
1637 /* those are exits from the loop */
1644 unsigned jumpCnt; jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1645 BasicBlock * * jumpTab; jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1649 noway_assert(*jumpTab);
1650 exitPoint = *jumpTab;
1652 if (exitPoint->bbNum < top->bbNum ||
1653 exitPoint->bbNum > bottom->bbNum )
1659 while (++jumpTab, --jumpCnt);
1663 noway_assert(!"Unexpected bbJumpKind");
1668 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1669 * This is to ensure condition 1. above which prevents marking fake loops
1671 * Below is an example:
1679 * The example above is not a loop since we bail after the first iteration
1681 * The condition we have to check for is
1682 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1683 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1685 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1686 * loop bottom then we cannot iterate
1688 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1689 * part of the loop nodes (as per definition they are loop exits executed only once),
1690 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1692 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1694 switch (loopBlock->bbJumpKind)
1699 if (fgDominate(loopBlock, bottom))
1706 bool canIterateLoop = false;
1708 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1710 if (predEntry->flBlock->bbNum >= top->bbNum &&
1711 predEntry->flBlock->bbNum <= bottom->bbNum )
1713 canIterateLoop = true;
1716 else if (predEntry->flBlock != head)
1718 // The entry block has multiple predecessors outside the loop; the 'head'
1719 // block isn't the only one. We only support a single 'head', so bail.
1724 if (!canIterateLoop)
1727 /* Double check - make sure that all loop blocks except ENTRY
1728 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1729 * us from considering non-loops due to incorrectly assuming that we had a back edge
1732 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1735 for (loopBlock = top;
1736 loopBlock != bottom->bbNext;
1737 loopBlock = loopBlock->bbNext)
1739 if (loopBlock == entry)
1742 for (predTop = loopBlock->bbPreds;
1744 predTop = predTop->flNext)
1746 if (predTop->flBlock->bbNum < top->bbNum ||
1747 predTop->flBlock->bbNum > bottom->bbNum )
1749 //noway_assert(!"Found loop with multiple entries");
1755 // Disqualify loops where the first block of the loop is less nested in EH than
1756 // the bottom block. That is, we don't want to handle loops where the back edge
1757 // goes from within an EH region to a first block that is outside that same EH
1758 // region. Note that we *do* handle loops where the first block is the *first*
1759 // block of a more nested EH region (since it is legal to branch to the first
1760 // block of an immediately more nested EH region). So, for example, disqualify
1767 // BB10 BBJ_COND => BB02
1771 // Here, BB10 is more nested than BB02.
1773 if (bottom->hasTryIndex() &&
1774 !bbInTryRegions(bottom->getTryIndex(), first))
1776 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting loop.\n",
1777 first->bbNum, bottom->bbNum);
1781 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1782 // Disqualify loops where the first block of the loop is a finally target.
1783 // The main problem is when multiple loops share a 'first' block that is a finally
1784 // target and we canonicalize the loops by adding a new loop head. In that case, we
1785 // need to update the blocks so the finally target bit is moved to the newly created
1786 // block, and removed from the old 'first' block. This is 'hard', so at this point
1787 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1788 // long-term), it's easier to disallow the loop than to update the flow graph to
1789 // support this case.
1791 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1793 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n",
1797 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1799 /* At this point we have a loop - record it in the loop table
1800 * If we found only one exit, record it in the table too
1801 * (otherwise an exit = 0 in the loop table means multiple exits) */
1808 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1811 if (!hasMethodLoops)
1813 /* mark the method as containing natural loops */
1815 hasMethodLoops = true;
1818 /* increment total number of loops found */
1822 /* keep track of the number of exits */
1823 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1824 #endif // COUNT_LOOPS
1827 /* current predecessor not good for a loop - continue with another one, if any */
1833 loopCountTable.record(loopsThisMethod);
1834 if (maxLoopsPerMethod < loopsThisMethod)
1836 maxLoopsPerMethod = loopsThisMethod;
1838 if (loopOverflowThisMethod)
1840 totalLoopOverflows++;
1842 #endif // COUNT_LOOPS
1844 // Now the loop indices are stable. We can figure out parent/child relationships
1845 // (using table indices to name loops), and label blocks.
1846 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1848 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1851 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1853 optLoopTable[loopInd].lpParent = possibleParent;
1854 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1855 optLoopTable[possibleParent].lpChild = loopInd;
1861 // Now label the blocks with the innermost loop to which they belong. Since parents
1862 // precede children in the table, doing the labeling for each loop in order will achieve
1863 // this -- the innermost loop labeling will be done last.
1864 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1866 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1867 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1868 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1870 blk->bbNatLoopNum = loopInd;
1871 if (blk == bottom) break;
1872 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1876 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1877 // one, if necessary, for loops containing others that share a "top."
1879 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1881 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1882 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1886 if (optCanonicalizeLoopNest(loopInd))
1893 fgUpdateChangedFlowGraph();
1897 if (verbose && optLoopCount > 0)
1899 printf("\nFinal natural loop table:\n");
1900 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1902 optPrintLoopInfo(loopInd);
1909 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1911 BasicBlock* newJumpDest = nullptr;
1912 switch (blk->bbJumpKind)
1917 case BBJ_EHFILTERRET:
1918 case BBJ_EHFINALLYRET:
1919 case BBJ_EHCATCHRET:
1920 // These have no jump destination to update.
1925 case BBJ_CALLFINALLY:
1927 // All of these have a single jump destination to update.
1928 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1930 blk->bbJumpDest = newJumpDest;
1936 bool redirected = false;
1937 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1939 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1941 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1945 // If any redirections happend, invalidate the switch table map for the switch.
1947 GetSwitchDescMap()->Remove(blk);
1956 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1957 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1959 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1961 // copy the jump destination(s) from "from" to "to".
1962 switch (to->bbJumpKind)
1966 case BBJ_CALLFINALLY:
1968 // All of these have a single jump destination to update.
1969 to->bbJumpDest = from->bbJumpDest;
1974 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1975 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1976 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
1978 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
1980 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
1990 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
1991 // Returns 'true' if the flow graph is modified.
1992 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
1994 bool modified = false;
1996 // Is the top of the current loop not in any nested loop?
1997 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
1999 if (optCanonicalizeLoop(loopInd))
2005 for (unsigned char child = optLoopTable[loopInd].lpChild;
2006 child != BasicBlock::NOT_IN_LOOP;
2007 child = optLoopTable[child].lpSibling)
2009 if (optCanonicalizeLoopNest(child))
2018 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2020 // Is the top uniquely part of the current loop?
2021 BasicBlock* t = optLoopTable[loopInd].lpTop;
2023 if (t->bbNatLoopNum == loopInd)
2026 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to canonicalize\n",
2027 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2029 // Otherwise, the top of this loop is also part of a nested loop.
2031 // Insert a new unique top for this loop. We must be careful to put this new
2032 // block in the correct EH region. Note that f->bbPrev might be in a different
2033 // EH region. For example:
2041 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2042 // On the other hand, the first block of multiple loops might be the first
2043 // block of a 'try' region that is completely contained in the multiple loops.
2048 // BB10 BBJ_ALWAYS => BB08
2050 // BB12 BBJ_ALWAYS => BB08
2052 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2053 // is a single-block "try" region. Neither loop "bottom" block is in the same
2054 // "try" region as BB08. This is legal because you can jump to the first block
2055 // of a try region. With EH normalization, no two "try" regions will share
2056 // this block. In this case, we need to insert a new block for the outer loop
2057 // in the same EH region as the branch from the "bottom":
2062 // BB10 BBJ_ALWAYS => BB08
2064 // BB12 BBJ_ALWAYS => BB30
2066 // Another possibility is that the "first" block of the loop nest can be the first block
2067 // of a "try" region that also has other predecessors than those in the loop, or even in
2068 // the "try" region (since blocks can target the first block of a "try" region). For example:
2072 // BB10 BBJ_ALWAYS => BB08
2074 // BB12 BBJ_ALWAYS => BB08
2077 // BB20 BBJ_ALWAYS => BB08
2079 // BB25 BBJ_ALWAYS => BB08
2081 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2082 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2083 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2084 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2085 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2086 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2087 // situation of branching to a non-first block of a 'try' region.
2089 // We can also have a loop nest where the "first" block is outside of a "try" region
2090 // and the back edges are inside a "try" region, for example:
2094 // BB09 try { BBJ_COND => BB02
2096 // BB15 BBJ_COND => BB02
2098 // BB21 } // end of "try"
2100 // In this case, both loop back edges were formed by "leave" instructions that were
2101 // imported into branches that were later made conditional. In this case, we don't
2102 // want to copy the EH region of the back edge, since that would create a block
2103 // outside of and disjoint with the "try" region of the back edge. However, to
2104 // simplify things, we disqualify this type of loop, so we should never see this here.
2106 BasicBlock* h = optLoopTable[loopInd].lpHead;
2107 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2108 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2110 // The loop must be entirely contained within a single handler region.
2111 assert(BasicBlock::sameHndRegion(f, b));
2113 // If the bottom block is in the same "try" region, then we extend the EH
2114 // region. Otherwise, we add the new block outside the "try" region.
2115 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2116 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2119 // We need to set the EH region manually. Set it to be the same
2120 // as the bottom block.
2121 newT->copyEHRegion(b);
2124 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2126 // Redirect the "bottom" of the current loop to "newT".
2127 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2128 blockMap->Set(t, newT);
2129 optRedirectBlock(b, blockMap);
2131 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2132 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2133 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2134 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2137 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2138 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2139 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2140 // edge of the blockMap, so nothing will happen.
2141 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2143 BasicBlock* topPredBlock = topPred->flBlock;
2145 // Skip if topPredBlock is in the loop.
2146 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2147 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2148 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2149 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2151 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not redirecting its bottom edge\n",
2152 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2156 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum, newT->bbNum);
2157 optRedirectBlock(topPredBlock, blockMap);
2160 assert(newT->bbNext == f);
2163 newT->bbJumpKind = BBJ_ALWAYS;
2164 newT->bbJumpDest = t;
2165 newT->bbTreeList = nullptr;
2166 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2169 // If it had been a do-while loop (top == entry), update entry, as well.
2170 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2171 if (optLoopTable[loopInd].lpTop == origE)
2172 optLoopTable[loopInd].lpEntry = newT;
2173 optLoopTable[loopInd].lpTop = newT;
2174 optLoopTable[loopInd].lpFirst = newT;
2176 newT->bbNatLoopNum = loopInd;
2178 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum, dspPtr(newT), loopInd);
2180 // Make sure the head block still goes to the entry...
2181 if (h->bbJumpKind == BBJ_NONE &&
2182 h->bbNext != optLoopTable[loopInd].lpEntry)
2184 h->bbJumpKind = BBJ_ALWAYS;
2185 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2187 else if (h->bbJumpKind == BBJ_COND &&
2188 h->bbNext == newT &&
2189 newT != optLoopTable[loopInd].lpEntry)
2191 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/true);
2192 optLoopTable[loopInd].lpHead = h2;
2193 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2194 h2->bbTreeList = nullptr;
2195 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2198 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2199 // it must be the case that they were do-while's (since "h" fell through to the entry).
2200 // The new node "newT" becomes the head of such loops.
2201 for (unsigned char childLoop = optLoopTable[loopInd].lpChild;
2202 childLoop != BasicBlock::NOT_IN_LOOP;
2203 childLoop = optLoopTable[childLoop].lpSibling)
2205 if ( optLoopTable[childLoop].lpEntry == origE
2206 && optLoopTable[childLoop].lpHead == h
2207 && newT->bbJumpKind == BBJ_NONE
2208 && newT->bbNext == origE)
2210 optUpdateLoopHead(childLoop, h, newT);
2216 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2218 assert(l1 != BasicBlock::NOT_IN_LOOP);
2219 if (l1 == l2) return true;
2220 else if (l2 == BasicBlock::NOT_IN_LOOP) return false;
2221 else return optLoopContains(l1, optLoopTable[l2].lpParent);
2224 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2226 assert(optLoopTable[loopInd].lpHead == from);
2227 optLoopTable[loopInd].lpHead = to;
2228 for (unsigned char childLoop = optLoopTable[loopInd].lpChild;
2229 childLoop != BasicBlock::NOT_IN_LOOP;
2230 childLoop = optLoopTable[childLoop].lpSibling)
2232 if (optLoopTable[childLoop].lpHead == from)
2233 optUpdateLoopHead(childLoop, from, to);
2237 /*****************************************************************************
2238 * If the : i += const" will cause an overflow exception for the small types.
2241 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2247 case TYP_BYTE: type_MAX = SCHAR_MAX; break;
2248 case TYP_UBYTE: type_MAX = UCHAR_MAX; break;
2249 case TYP_SHORT: type_MAX = SHRT_MAX; break;
2250 case TYP_CHAR: type_MAX = USHRT_MAX; break;
2252 case TYP_UINT: // Detected by checking for 32bit ....
2253 case TYP_INT: return false; // ... overflow same as done for TYP_INT
2255 default: NO_WAY("Bad type");
2258 if (iterAtExit > type_MAX)
2264 /*****************************************************************************
2265 * If the "i -= const" will cause an underflow exception for the small types
2268 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2274 case TYP_BYTE: type_MIN = SCHAR_MIN; break;
2275 case TYP_SHORT: type_MIN = SHRT_MIN; break;
2276 case TYP_UBYTE: type_MIN = 0; break;
2277 case TYP_CHAR: type_MIN = 0; break;
2279 case TYP_UINT: // Detected by checking for 32bit ....
2280 case TYP_INT: return false; // ... underflow same as done for TYP_INT
2282 default: NO_WAY("Bad type");
2285 if (iterAtExit < type_MIN)
2291 /*****************************************************************************
2293 * Helper for unroll loops - Computes the number of repetitions
2294 * in a constant loop. If it cannot prove the number is constant returns false
2297 bool Compiler::optComputeLoopRep(int constInit,
2300 genTreeOps iterOper,
2301 var_types iterOperType,
2302 genTreeOps testOper,
2305 unsigned * iterCount)
2307 noway_assert(genActualType(iterOperType) == TYP_INT);
2310 __int64 constLimitX;
2315 // Using this, we can just do a signed comparison with other 32 bit values.
2316 if (unsTest) constLimitX = (unsigned int)constLimit;
2317 else constLimitX = ( signed int)constLimit;
2319 switch (iterOperType)
2321 // For small types, the iteration operator will narrow these values if big
2323 #define INIT_ITER_BY_TYPE(type) constInitX = (type)constInit; iterInc = (type)iterInc;
2325 case TYP_BYTE: INIT_ITER_BY_TYPE( signed char ); break;
2326 case TYP_UBYTE: INIT_ITER_BY_TYPE(unsigned char ); break;
2327 case TYP_SHORT: INIT_ITER_BY_TYPE( signed short); break;
2328 case TYP_CHAR: INIT_ITER_BY_TYPE(unsigned short); break;
2330 // For the big types, 32 bit arithmetic is performed
2333 case TYP_UINT: if (unsTest) constInitX = (unsigned int)constInit;
2334 else constInitX = ( signed int)constInit;
2338 noway_assert(!"Bad type");
2342 /* If iterInc is zero we have an infinite loop */
2346 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2347 iterSign = (iterInc > 0) ? +1 : -1;
2349 /* Initialize loopCount to zero */
2352 // If dupCond is true then the loop head contains a test which skips
2353 // this loop, if the constInit does not pass the loop test
2354 // Such a loop can execute zero times.
2355 // If dupCond is false then we have a true do-while loop which we
2356 // always execute the loop once before performing the loop test
2360 constInitX += iterInc;
2363 // bail if count is based on wrap-around math
2366 if (constLimitX < constInitX)
2369 else if (constLimitX > constInitX)
2372 /* Compute the number of repetitions */
2376 __int64 iterAtExitX;
2379 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2383 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2384 * have a constant number of iterations or loop forever -
2385 * we have to compute (lim-init) mod iterInc to see if it is zero.
2386 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2387 * which is probably not what the end user wanted, but it is legal.
2392 /* Stepping by one, i.e. Mod with 1 is always zero */
2395 if (((constLimitX - constInitX) % iterInc) != 0)
2401 noway_assert(iterInc < 0);
2402 /* Stepping by -1, i.e. Mod with 1 is always zero */
2405 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2419 if (constInitX != constLimitX)
2420 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2422 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2425 iterAtExitX = (unsigned)iterAtExitX;
2427 // Check if iteration incr will cause overflow for small types
2428 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2431 // iterator with 32bit overflow. Bad for TYP_(U)INT
2432 if (iterAtExitX < constLimitX)
2435 *iterCount = loopCount;
2451 noway_assert(!"Unknown operator for loop iterator");
2465 if (constInitX < constLimitX)
2466 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2468 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2471 iterAtExitX = (unsigned)iterAtExitX;
2473 // Check if iteration incr will cause overflow for small types
2474 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2477 // iterator with 32bit overflow. Bad for TYP_(U)INT
2478 if (iterAtExitX < constLimitX)
2481 *iterCount = loopCount;
2497 noway_assert(!"Unknown operator for loop iterator");
2511 if (constInitX <= constLimitX)
2512 loopCount += (unsigned) ((constLimitX - constInitX) / iterInc) + 1;
2514 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2517 iterAtExitX = (unsigned)iterAtExitX;
2519 // Check if iteration incr will cause overflow for small types
2520 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2523 // iterator with 32bit overflow. Bad for TYP_(U)INT
2524 if (iterAtExitX <= constLimitX)
2527 *iterCount = loopCount;
2543 noway_assert(!"Unknown operator for loop iterator");
2557 if (constInitX > constLimitX)
2558 loopCount += (unsigned) ((constLimitX - constInitX - iterSign) / iterInc) + 1;
2560 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2563 iterAtExitX = (unsigned)iterAtExitX;
2565 // Check if small types will underflow
2566 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2569 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2570 if (iterAtExitX > constLimitX)
2573 *iterCount = loopCount;
2589 noway_assert(!"Unknown operator for loop iterator");
2603 if (constInitX >= constLimitX)
2604 loopCount += (unsigned) ((constLimitX - constInitX) / iterInc) + 1;
2606 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2609 iterAtExitX = (unsigned)iterAtExitX;
2611 // Check if small types will underflow
2612 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2615 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2616 if (iterAtExitX >= constLimitX)
2619 *iterCount = loopCount;
2635 noway_assert(!"Unknown operator for loop iterator");
2640 noway_assert(!"Unknown operator for loop condition");
2647 /*****************************************************************************
2649 * Look for loop unrolling candidates and unroll them
2653 #pragma warning(push)
2654 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
2656 void Compiler::optUnrollLoops()
2658 if (compCodeOpt() == SMALL_CODE)
2661 if (optLoopCount == 0)
2665 if (JitConfig.JitNoUnroll())
2671 if (optCanCloneLoops())
2678 printf("*************** In optUnrollLoops()\n");
2680 /* Look for loop unrolling candidates */
2682 /* Double loop so that after unrolling an inner loop we set change to true
2683 * and we then go back over all of the loop candidates and try to unroll
2684 * the next outer loop, until we don't unroll any loops,
2685 * then change will be false and we are done.
2689 bool change = false;
2691 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2695 BasicBlock * bottom;
2705 int lbeg; // initial value for iterator
2706 int llim; // limit value for iterator
2707 unsigned lvar; // iterator lclVar #
2708 int iterInc; // value to increment the iterator
2709 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2710 var_types iterOperType; // type result of the oper (for overflow instrs)
2711 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2712 bool unsTest; // Is the comparison u/int
2714 unsigned totalIter; // total number of iterations in the constant loop
2715 unsigned loopCostSz; // Cost is size of one iteration
2716 unsigned loopFlags; // actual lpFlags
2717 unsigned requiredFlags; // required lpFlags
2719 GenTree * loopList; // new stmt list of the unrolled loop
2722 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] =
2730 noway_assert(ITER_LIMIT[ SMALL_CODE] == 0);
2731 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2733 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2736 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2740 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] =
2748 noway_assert(UNROLL_LIMIT_SZ[ SMALL_CODE] == 0);
2749 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2751 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2754 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2755 unrollLimitSz *= 10;
2758 loopFlags = optLoopTable[lnum].lpFlags;
2759 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2762 /* Ignore the loop if we don't have a do-while with a single exit
2763 that has a constant number of iterations */
2765 if ((loopFlags & requiredFlags) != requiredFlags)
2768 /* ignore if removed or marked as not unrollable */
2770 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2773 head = optLoopTable[lnum].lpHead; noway_assert(head);
2774 bottom = optLoopTable[lnum].lpBottom; noway_assert(bottom);
2776 /* The single exit must be at the bottom of the loop */
2777 noway_assert(optLoopTable[lnum].lpExit);
2778 if (optLoopTable[lnum].lpExit != bottom)
2781 /* Unrolling loops with jumps in them is not worth the headache
2782 * Later we might consider unrolling loops after un-switching */
2787 block = block->bbNext; noway_assert(block);
2789 if (block->bbJumpKind != BBJ_NONE)
2791 if (block != bottom)
2795 while (block != bottom);
2797 /* Get the loop data:
2801 - iterator increment
2802 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2803 - loop test type (i.e. GT_GE, GT_LT, etc...)
2806 lbeg = optLoopTable[lnum].lpConstInit;
2807 llim = optLoopTable[lnum].lpConstLimit();
2808 testOper = optLoopTable[lnum].lpTestOper();
2810 lvar = optLoopTable[lnum].lpIterVar();
2811 iterInc = optLoopTable[lnum].lpIterConst();
2812 iterOper = optLoopTable[lnum].lpIterOper();
2814 iterOperType= optLoopTable[lnum].lpIterOperType();
2815 unsTest =(optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2817 if (lvaTable[lvar].lvAddrExposed) // If the loop iteration variable is address-exposed then bail
2819 if (lvaTable[lvar].lvIsStructField) // If the loop iteration variable is a promoted field from a struct then bail
2822 /* Locate the pre-header and initialization and increment/test statements */
2824 phdr = head->bbTreeList; noway_assert(phdr);
2825 loop = bottom->bbTreeList; noway_assert(loop);
2827 init = head->lastStmt(); noway_assert(init && (init->gtNext == 0));
2828 test = bottom->lastStmt(); noway_assert(test && (test->gtNext == 0));
2829 incr = test->gtPrev; noway_assert(incr);
2831 if (init->gtFlags & GTF_STMT_CMPADD)
2833 /* Must be a duplicated loop condition */
2834 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2837 init = init->gtPrev; noway_assert(init);
2842 /* Find the number of iterations - the function returns false if not a constant number */
2844 if (!optComputeLoopRep(lbeg, llim,
2845 iterInc, iterOper, iterOperType,
2846 testOper, unsTest, dupCond,
2850 /* Forget it if there are too many repetitions or not a constant loop */
2852 if (totalIter > iterLimit)
2855 noway_assert(init->gtOper == GT_STMT); init = init->gtStmt.gtStmtExpr;
2856 noway_assert(test->gtOper == GT_STMT); test = test->gtStmt.gtStmtExpr;
2857 noway_assert(incr->gtOper == GT_STMT); incr = incr->gtStmt.gtStmtExpr;
2859 // Don't unroll loops we don't understand.
2860 if (incr->gtOper == GT_ASG)
2865 /* Make sure everything looks ok */
2867 (init->gtOper != GT_ASG) ||
2868 (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
2869 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
2870 (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
2871 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
2873 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
2874 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
2875 (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
2876 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
2877 (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
2879 (test->gtOper != GT_JTRUE) )
2881 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
2885 /* heuristic - Estimated cost in code size of the unrolled loop */
2893 block = block->bbNext;
2895 /* Visit all the statements in the block */
2897 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
2899 /* Get the expression and stop if end reached */
2901 GenTreePtr expr = stmt->gtStmtExpr;
2905 /* Calculate gtCostSz */
2906 gtSetStmtInfo(stmt);
2908 /* Update loopCostSz */
2909 loopCostSz += stmt->gtCostSz;
2912 while (block != bottom);
2914 /* Compute the estimated increase in code size for the unrolled loop */
2916 unsigned int fixedLoopCostSz; fixedLoopCostSz = 8;
2918 int unrollCostSz; unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
2920 /* Don't unroll if too much code duplication would result. */
2922 if (unrollCostSz > unrollLimitSz)
2924 /* prevent this loop from being revisited */
2925 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
2929 /* Looks like a good idea to unroll this loop, let's do it! */
2930 CLANG_FORMAT_COMMENT_ANCHOR;
2935 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
2936 if (head->bbNext->bbNum != bottom->bbNum)
2937 printf("..BB%02u", bottom->bbNum);
2938 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
2939 printf(" unrollCostSz = %d\n", unrollCostSz);
2944 /* Create the unrolled loop statement list */
2949 for (lval = lbeg; totalIter; totalIter--)
2958 block = block->bbNext; noway_assert(block);
2960 /* Visit all the statements in the block */
2962 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
2964 /* Stop if we've reached the end of the loop */
2966 if (stmt->gtStmtExpr == incr)
2969 /* Clone/substitute the expression */
2971 expr = gtCloneExpr(stmt, 0, lvar, lval);
2973 // cloneExpr doesn't handle everything
2977 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
2981 /* Append the expression to our list */
2984 loopLast->gtNext = expr;
2988 expr->gtPrev = loopLast;
2992 while (block != bottom);
2994 /* update the new value for the unrolled iterator */
3008 noway_assert(!"Unrolling not implemented for this loop iterator");
3012 noway_assert(!"Unknown operator for constant loop iterator");
3017 /* Finish the linked list */
3021 loopList->gtPrev = loopLast;
3022 loopLast->gtNext = 0;
3025 /* Replace the body with the unrolled one */
3031 block = block->bbNext; noway_assert(block);
3032 block->bbTreeList = 0;
3033 block->bbJumpKind = BBJ_NONE;
3034 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3036 while (block != bottom);
3038 bottom->bbJumpKind = BBJ_NONE;
3039 bottom->bbTreeList = loopList;
3040 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3041 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3045 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3047 /* Update bbRefs and bbPreds */
3048 /* Here head->bbNext is bottom !!! - Replace it */
3050 fgRemoveRefPred(head->bbNext, bottom);
3052 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3053 * (the last value of the iterator in the loop)
3054 * and drop the jump condition since the unrolled loop will always execute */
3056 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3058 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3060 if (head->bbJumpKind == BBJ_COND)
3062 phdr = head->bbTreeList; noway_assert(phdr);
3063 test = phdr->gtPrev;
3065 noway_assert(test && (test->gtNext == 0));
3066 noway_assert(test->gtOper == GT_STMT);
3067 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3069 init = test->gtPrev; noway_assert(init && (init->gtNext == test));
3070 noway_assert(init->gtOper == GT_STMT);
3073 phdr->gtPrev = init;
3074 head->bbJumpKind = BBJ_NONE;
3075 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3077 /* Update bbRefs and bbPreds */
3079 fgRemoveRefPred(head->bbJumpDest, head);
3083 /* the loop must execute */
3084 noway_assert(head->bbJumpKind == BBJ_NONE);
3090 printf("Whole unrolled loop:\n");
3092 GenTreePtr s = loopList;
3096 noway_assert(s->gtOper == GT_STMT);
3107 /* Remember that something has changed */
3111 /* Make sure to update loop table */
3113 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3114 * (also make head and bottom NULL - to hit an assert or GPF) */
3116 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3117 optLoopTable[lnum].lpHead =
3118 optLoopTable[lnum].lpBottom = nullptr;
3128 fgDebugCheckBBlist();
3132 #pragma warning(pop)
3135 /*****************************************************************************
3137 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3138 * not execute a method call.
3141 bool Compiler::optReachWithoutCall(BasicBlock *topBB,
3144 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3145 // as some helper calls are neither interruptible nor hijackable.
3146 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3147 // those helpers too.
3149 noway_assert(topBB->bbNum <= botBB->bbNum);
3151 // We can always check topBB and botBB for any gc safe points and early out
3153 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3156 // Otherwise we will need to rely upon the dominator sets
3158 if (!fgDomsComputed)
3160 // return a conservative answer of true when we don't have the dominator sets
3164 BasicBlock *curBB = topBB;
3167 noway_assert(curBB);
3169 // If we added a loop pre-header block then we will
3170 // have a bbNum greater than fgLastBB, and we won't have
3171 // any dominator information about this block, so skip it.
3173 if (curBB->bbNum <= fgLastBB->bbNum)
3175 noway_assert(curBB->bbNum <= botBB->bbNum);
3177 // Does this block contain a gc safe point?
3179 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3181 // Will this block always execute on the way to botBB ?
3183 // Since we are checking every block in [topBB .. botBB] and we are using
3184 // a lexical definition of a loop.
3185 // (all that we know is that is that botBB is a back-edge to topBB)
3186 // Thus while walking blocks in this range we may encounter some blocks
3187 // that are not really part of the loop, and so we need to perform
3188 // some additional checks:
3190 // We will check that the current 'curBB' is reachable from 'topBB'
3191 // and that it dominates the block containing the back-edge 'botBB'
3192 // When both of these are true then we know that the gcsafe point in 'curBB'
3193 // will be encountered in the loop and we can return false
3195 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3200 // If we've reached the destination block, then we're done
3207 curBB = curBB->bbNext;
3210 // If we didn't find any blocks that contained a gc safe point and
3211 // also met the fgDominate and fgReachable criteria then we must return true
3216 /*****************************************************************************
3218 * Find the loop termination test at the bottom of the loop
3222 GenTreePtr optFindLoopTermTest(BasicBlock *bottom)
3224 GenTreePtr testt = bottom->bbTreeList;
3226 assert(testt && testt->gtOper == GT_STMT);
3228 GenTreePtr result = testt->gtPrev;
3231 while (testt->gtNext)
3232 testt = testt->gtNext;
3234 assert(testt == result);
3240 /*****************************************************************************
3241 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3244 void Compiler::fgOptWhileLoop(BasicBlock * block)
3246 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3247 noway_assert(compCodeOpt() != SMALL_CODE);
3250 Optimize while loops into do { } while loop
3251 Our loop hoisting logic requires do { } while loops.
3252 Specifically, we're looking for the following case:
3263 If we find this, and the condition is simple enough, we change
3264 the loop to the following:
3269 // else fall-through
3280 /* Does the BB end with an unconditional jump? */
3282 if (block->bbJumpKind != BBJ_ALWAYS ||
3283 (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)) // It can't be one of the ones we use for our exception magic
3286 // It has to be a forward jump
3287 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3289 if (fgIsForwardBranch(block) == false)
3292 // Get hold of the jump target
3293 BasicBlock * bTest = block->bbJumpDest;
3295 // Does the block consist of 'jtrue(cond) block' ?
3296 if (bTest->bbJumpKind != BBJ_COND)
3299 // bTest must be a backwards jump to block->bbNext
3300 if (bTest->bbJumpDest != block->bbNext)
3303 // Since test is a BBJ_COND it will have a bbNext
3304 noway_assert(bTest->bbNext);
3306 // 'block' must be in the same try region as the condition, since we're going to insert
3307 // a duplicated condition in 'block', and the condition might include exception throwing code.
3308 if (!BasicBlock::sameTryRegion(block, bTest))
3311 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3312 // same try region (or no try region) to avoid generating illegal flow.
3313 BasicBlock* bTestNext = bTest->bbNext;
3314 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3317 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3319 // bTest must only contain only a jtrue with no other stmts, we will only clone
3320 // the conditional, so any other statements will not get cloned
3321 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3323 if (bTest->bbTreeList != condStmt)
3326 /* Get to the condition node from the statement tree */
3328 noway_assert(condStmt->gtOper == GT_STMT);
3330 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3331 noway_assert(condTree->gtOper == GT_JTRUE);
3333 condTree = condTree->gtOp.gtOp1;
3335 // The condTree has to be a RelOp comparison
3336 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3338 if (condTree->OperIsCompare() == false)
3341 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3343 gtPrepareCost(condTree);
3344 unsigned estDupCostSz = condTree->gtCostSz;
3346 double loopIterations = (double) BB_LOOP_WEIGHT;
3348 bool allProfileWeightsAreValid = false;
3349 BasicBlock::weight_t weightBlock = block->bbWeight;
3350 BasicBlock::weight_t weightTest = bTest->bbWeight;
3351 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3353 // If we have profile data then we calculate the number of time
3354 // the loop will iterate into loopIterations
3355 if (fgIsUsingProfileWeights())
3357 // Only rely upon the profile weight when all three of these blocks
3358 // have good profile weights
3359 if ((block->bbFlags & BBF_PROF_WEIGHT) &&
3360 (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3361 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3363 allProfileWeightsAreValid = true;
3365 // If this while loop never iterates then don't bother transforming
3366 if (weightNext == 0)
3369 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3370 // if the profile weights are all valid.
3372 // weightNext is the number of time this loop iterates
3373 // weightBlock is the number of times that we enter the while loop
3374 // loopIterations is the average number of times that this loop iterates
3376 if (weightTest >= weightBlock)
3378 loopIterations = (double) block->bbNext->bbWeight / (double) block->bbWeight;
3383 unsigned maxDupCostSz = 32;
3385 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3386 // set loop weights yet
3387 if ((compCodeOpt() == FAST_CODE) ||
3388 compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3393 // If this loop iterates a lot then raise the maxDupCost
3394 if (loopIterations >= 12.0)
3396 if (loopIterations >= 96.0)
3399 // If the loop condition has a shared static helper, we really want this loop converted
3400 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3401 // be executed on every loop iteration.
3402 int countOfHelpers = 0;
3403 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3405 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3407 maxDupCostSz += 24 * min(countOfHelpers, (int) (loopIterations + 1.5));
3410 // If the compare has too high cost then we don't want to dup
3412 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3417 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3418 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3420 costIsTooHigh ? "not done" : "performed",
3422 costIsTooHigh ? "greater" : "less or equal",
3424 loopIterations, countOfHelpers,
3425 allProfileWeightsAreValid ? "true" : "false");
3434 /* Looks good - duplicate the condition test */
3436 condTree->gtFlags |= GTF_RELOP_ZTT;
3438 condTree = gtCloneExpr(condTree);
3439 gtReverseCond(condTree);
3441 // Make sure clone expr copied the flag
3442 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3444 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3446 /* Create a statement entry out of the condition and
3447 append the condition test at the end of 'block' */
3449 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3451 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3453 #ifdef DEBUGGING_SUPPORT
3454 if (opts.compDbgInfo)
3455 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3458 // Flag the block that received the copy as potentially having an array/vtable
3459 // reference if the block copied from did; this is a conservative guess.
3460 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3462 block->bbFlags |= copyFlags;
3465 // If we have profile data for all blocks and we know that we are cloning the
3466 // bTest block into block and thus changing the control flow from block so
3467 // that it no longer goes directly to bTest anymore, we have to adjust the
3468 // weight of bTest by subtracting out the weight of block.
3470 if (allProfileWeightsAreValid)
3473 // Some additional sanity checks before adjusting the weight of bTest
3475 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3477 // Get the two edge that flow out of bTest
3478 flowList * edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3479 flowList * edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3481 // Calculate the new weight for block bTest
3483 BasicBlock::weight_t newWeightTest = (weightTest > weightBlock)
3484 ? (weightTest - weightBlock)
3486 bTest->bbWeight = newWeightTest;
3488 if (newWeightTest == BB_ZERO_WEIGHT)
3490 bTest->bbFlags |= BBF_RUN_RARELY;
3491 // All out edge weights are set to zero
3492 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3493 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3494 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3495 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3499 // Update the our edge weights
3500 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3501 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3502 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3503 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3508 /* Change the block to end with a conditional jump */
3510 block->bbJumpKind = BBJ_COND;
3511 block->bbJumpDest = bTest->bbNext;
3513 /* Mark the jump dest block as being a jump target */
3514 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
3516 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3518 fgAddRefPred(block->bbNext, block);
3520 fgRemoveRefPred(bTest, block);
3521 fgAddRefPred(bTest->bbNext, block);
3526 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)",
3527 block->bbNum, block->bbNext->bbNum, bTest->bbNum);
3528 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3530 gtDispTree(copyOfCondStmt);
3536 /*****************************************************************************
3538 * Optimize the BasicBlock layout of the method
3541 void Compiler::optOptimizeLayout()
3543 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3548 printf("*************** In optOptimizeLayout()\n");
3552 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3553 fgDebugCheckBBlist();
3556 noway_assert(fgModified == false);
3558 for (BasicBlock *block = fgFirstBB; block; block = block->bbNext)
3560 /* Make sure the appropriate fields are initialized */
3562 if (block->bbWeight == BB_ZERO_WEIGHT)
3564 /* Zero weighted block can't have a LOOP_HEAD flag */
3565 noway_assert(block->isLoopHead() == false);
3569 assert(block->bbLoopNum == 0);
3571 if (compCodeOpt() != SMALL_CODE)
3573 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3575 fgOptWhileLoop(block);
3581 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3582 fgComputeEdgeWeights();
3585 fgUpdateFlowGraph(true);
3587 fgUpdateFlowGraph();
3590 /*****************************************************************************
3592 * Perform loop inversion, find and classify natural loops
3595 void Compiler::optOptimizeLoops()
3597 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3601 printf("*************** In optOptimizeLoops()\n");
3604 optSetBlockWeights();
3606 /* Were there any loops in the flow graph? */
3610 /* now that we have dominator information we can find loops */
3612 optFindNaturalLoops();
3614 unsigned loopNum = 0;
3616 /* Iterate over the flow graph, marking all loops */
3618 /* We will use the following terminology:
3619 * top - the first basic block in the loop (i.e. the head of the backward edge)
3620 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3621 * lastBottom - used when we have multiple back-edges to the same top
3628 for (top = fgFirstBB; top; top = top->bbNext)
3630 BasicBlock * foundBottom = NULL;
3632 for (pred = top->bbPreds; pred; pred = pred->flNext)
3634 /* Is this a loop candidate? - We look for "back edges" */
3636 BasicBlock * bottom = pred->flBlock;
3638 /* is this a backward edge? (from BOTTOM to TOP) */
3640 if (top->bbNum > bottom->bbNum)
3643 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3645 if (top->isLoopHead() == false)
3648 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3650 if ((bottom->bbJumpKind != BBJ_COND) &&
3651 (bottom->bbJumpKind != BBJ_ALWAYS) )
3654 /* the top block must be able to reach the bottom block */
3655 if (!fgReachable(top, bottom))
3658 /* Found a new loop, record the longest backedge in foundBottom */
3660 if ((foundBottom == NULL) || (bottom->bbNum > foundBottom->bbNum))
3662 foundBottom = bottom;
3670 /* Mark the loop header as such */
3671 assert(FitsIn<unsigned char>(loopNum));
3672 top->bbLoopNum = (unsigned char) loopNum;
3675 /* Mark all blocks between 'top' and 'bottom' */
3677 optMarkLoopBlocks(top, foundBottom, false);
3680 // We track at most 255 loops
3684 totalUnnatLoopOverflows++;
3691 totalUnnatLoopCount += loopNum;
3699 printf("\nFound a total of %d loops.", loopNum);
3700 printf("\nAfter loop weight marking:\n");
3701 fgDispBasicBlocks();
3706 optLoopsMarked = true;
3710 //------------------------------------------------------------------------
3711 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3714 // loopNum - the current loop index for which conditions are derived.
3715 // context - data structure where all loop cloning info is kept.
3718 // "false" if conditions cannot be obtained. "true" otherwise.
3719 // The cloning conditions are updated in the "conditions"[loopNum] field
3720 // of the "context" parameter.
3723 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3724 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3725 // condition is "less than". If the initializer is "var" init then adds condition
3726 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3727 // are added to "context". These conditions are checked in the pre-header block
3728 // and the cloning choice is made.
3731 // Callers should assume AND operation is used i.e., if all conditions are
3732 // true, then take the fast path.
3734 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3736 JITDUMP("------------------------------------------------------------\n");
3737 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3739 LoopDsc* loop = &optLoopTable[loopNum];
3740 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3742 if (loop->lpTestOper() == GT_LT)
3744 // Stride conditions
3745 if (loop->lpIterConst() <= 0)
3747 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3752 if (loop->lpFlags & LPFLG_CONST_INIT)
3754 // Only allowing const init at this time.
3755 if (loop->lpConstInit < 0)
3757 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3761 else if (loop->lpFlags & LPFLG_VAR_INIT)
3764 LC_Condition geZero(GT_GE,
3765 LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3766 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3767 context->EnsureConditions(loopNum)->Push(geZero);
3771 JITDUMP("> Not variable init\n");
3777 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3779 int limit = loop->lpConstLimit();
3782 JITDUMP("> limit %d is invalid\n", limit);
3785 ident = LC_Ident(limit, LC_Ident::Const);
3787 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3789 unsigned limitLcl = loop->lpVarLimit();
3790 ident = LC_Ident(limitLcl, LC_Ident::Var);
3792 LC_Condition geZero(GT_GE,
3794 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3796 context->EnsureConditions(loopNum)->Push(geZero);
3798 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
3800 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
3801 if (!loop->lpArrLenLimit(this, index))
3803 JITDUMP("> ArrLen not matching");
3806 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
3808 // Ensure that this array must be dereference-able, before executing the actual condition.
3809 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
3810 context->EnsureDerefs(loopNum)->Push(array);
3814 JITDUMP("> Undetected limit\n");
3818 for (unsigned i = 0; i < optInfos->Size(); ++i)
3820 LcOptInfo* optInfo = optInfos->GetRef(i);
3821 switch (optInfo->GetOptType())
3823 case LcOptInfo::LcJaggedArray:
3826 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
3827 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
3828 LC_Ident arrLenIdent = LC_Ident(arrLen);
3830 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
3831 context->EnsureConditions(loopNum)->Push(cond);
3833 // Ensure that this array must be dereference-able, before executing the actual condition.
3834 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
3835 context->EnsureDerefs(loopNum)->Push(array);
3838 case LcOptInfo::LcMdArray:
3840 // limit <= mdArrLen
3841 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
3842 LC_Condition cond(GT_LE,
3846 LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator()), mdArrInfo->dim, LC_Array::None)
3848 context->EnsureConditions(loopNum)->Push(cond);
3853 JITDUMP("Unknown opt\n");
3857 JITDUMP("Conditions: (");
3858 DBEXEC(verbose, context->PrintConditions(loopNum));
3865 //------------------------------------------------------------------------------------
3866 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
3869 // loopNum - the current loop index for which conditions are derived.
3870 // context - data structure where all loop cloning info is kept.
3873 // "false" if conditions cannot be obtained. "true" otherwise.
3874 // The deref conditions are updated in the "derefConditions"[loopNum] field
3875 // of the "context" parameter.
3877 // Definition of Deref Conditions:
3878 // To be able to check for the loop cloning condition that (limitVar <= a.len)
3879 // we should first be able to dereference "a". i.e., "a" is non-null.
3885 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
3886 // // (n <= a[i][j].len) and other safer conditions to take the fast path
3889 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
3890 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
3891 // be true to do the deref.
3893 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
3895 // Note the short circuiting AND. Implication: these conditions should be performed in separate
3896 // blocks each of which will branch to slow path if the condition evaluates to false.
3898 // Now, imagine a situation where we have
3899 // a[x][y][k] = 20 and a[i][j][k] = 0
3900 // also in the inner most loop where x, y are parameters, then our conditions will have
3904 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
3906 // But these conditions can be checked together with conditions
3907 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
3910 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
3911 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
3912 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
3913 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
3915 // This naturally yields a tree style pattern, where the nodes of the tree are
3916 // the array and indices respectively.
3932 // Notice that the variables in the same levels can have their conditions combined in the
3933 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
3934 // combined with a short-circuiting AND (i.e., different basic blocks).
3937 // Construct a tree of array indices and the array which will generate the optimal
3938 // conditions for loop cloning.
3940 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
3955 // In this method, we will construct such a tree by descending depth first into the array
3956 // index operation and forming a tree structure as we encounter the array or the index variables.
3958 // This tree structure will then be used to generate conditions like below:
3959 // (a != null) & (b != null) && // from the first level of the tree.
3961 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
3962 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
3964 // (j < a[i].len) & (y < a[i].len) && // from the third level.
3965 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
3970 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
3972 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
3975 // Get the dereference-able arrays.
3976 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
3978 // For each array in the dereference list, construct a tree,
3979 // where the nodes are array and index variables and an edge 'u-v'
3980 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
3981 // 'u-v-w' transitively if u[v][w] occurs.
3982 for (unsigned i = 0; i < deref->Size(); ++i)
3984 LC_Array& array = (*deref)[i];
3986 // First populate the array base variable.
3987 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
3988 if (node == nullptr)
3990 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
3994 // For each dimension (level) for the array, populate the tree with the variable
3995 // from that dimension.
3996 unsigned rank = (unsigned) array.GetDimRank();
3997 for (unsigned i = 0; i < rank; ++i)
3999 node->EnsureChildren(getAllocator());
4000 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4003 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4004 node->children->Push(tmp);
4007 // Descend one level down.
4011 // Keep the maxRank of all array dereferences.
4012 maxRank = max((int) rank, maxRank);
4018 for (unsigned i = 0; i < nodes.Size(); ++i)
4020 if (i != 0) printf(",");
4032 // First level will always yield the null-check, since it is made of the array base variables.
4033 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4034 // So add 1 after rank * 2.
4035 unsigned condBlocks = (unsigned) maxRank * 2 + 1;
4037 // Heuristic to not create too many blocks;
4043 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4044 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4045 for (unsigned i = 0; i < nodes.Size(); ++i)
4047 nodes[i]->DeriveLevelConditions(levelCond);
4050 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4055 //----------------------------------------------------------------------------
4056 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4059 // block - the block in which the helper call needs to be inserted.
4060 // insertBefore - the tree before which the helper call will be inserted.
4062 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4064 if (JitConfig.JitDebugLogLoopCloning() == 0)
4068 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4069 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4070 fgInsertStmtBefore(block, insertBefore, stmt);
4071 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4075 //------------------------------------------------------------------------
4076 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4077 // candidates gathered during the cloning phase.
4080 // loopNum - the current loop index for which the optimizations are performed.
4081 // context - data structure where all loop cloning info is kept.
4082 // dynamicPath - If true, the optimization is performed in the fast path among the
4083 // cloned loops. If false, it means this is the only path (i.e.,
4084 // there is no slow path.)
4087 // Perform the optimizations on the fast path i.e., the path in which the
4088 // optimization candidates were collected at the time of identifying them.
4089 // The candidates store all the information necessary (the tree/stmt/block
4090 // they are from) to perform the optimization.
4093 // The unoptimized path is either already cloned when this method is called or
4094 // there is no unoptimized path (got eliminated statically.) So this method
4095 // performs the optimizations assuming that the path in which the candidates
4096 // were collected is the fast path in which the optimizations will be performed.
4098 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4100 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4101 for (unsigned i = 0; i < optInfos->Size(); ++i)
4103 LcOptInfo* optInfo = optInfos->GetRef(i);
4104 switch (optInfo->GetOptType())
4106 case LcOptInfo::LcJaggedArray:
4108 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4109 compCurBB = arrIndexInfo->arrIndex.useBlock;
4110 optRemoveRangeCheck(
4111 arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim],
4112 arrIndexInfo->stmt, true, GTF_ASG, true);
4113 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4116 case LcOptInfo::LcMdArray:
4117 // TODO-CQ: CLONE: Implement.
4126 //----------------------------------------------------------------------------
4127 // optCanCloneLoops: Use the environment flag to determine whether loop
4128 // cloning is allowed to be performed.
4131 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4132 // Disabled for retail for now.
4134 bool Compiler::optCanCloneLoops()
4136 // Enabled for retail builds now.
4137 unsigned cloneLoopsFlag = 1;
4139 cloneLoopsFlag = JitConfig.JitCloneLoops();
4141 return (cloneLoopsFlag != 0);
4145 //----------------------------------------------------------------------------
4146 // optIsLoopClonable: Determine whether this loop can be cloned.
4149 // loopInd loop index which needs to be checked if it can be cloned.
4152 // Returns true if the loop can be cloned. If it returns false
4153 // prints a message in debug as why the loop can't be cloned.
4155 bool Compiler::optIsLoopClonable(unsigned loopInd)
4157 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4158 // inserting new EH regions in the exception table yet.
4159 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4160 unsigned loopRetCount = 0;
4161 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4163 if (blk->bbJumpKind == BBJ_RETURN) loopRetCount++;
4164 if (bbIsTryBeg(blk))
4166 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n",
4167 loopInd, info.compFullName);
4172 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4173 // into the middle of a handler (to go to the cloned copy.) Reject.
4174 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4176 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4180 // If the head and entry are in different EH regions, reject.
4181 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4183 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4187 // Is the first block after the last block of the loop a handler or filter start?
4188 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4189 // and go to where the original loop did. That raises problems when we don't actually go to
4190 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4191 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4192 // loop. This is just a corner to cut to get this working faster.
4193 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4194 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4196 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4200 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4201 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4202 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4203 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4204 if (fgReturnCount + loopRetCount > 4)
4206 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, would exceed the limit of 4.\n", loopRetCount, fgReturnCount);
4210 // Otherwise, we're going to add those return blocks.
4211 fgReturnCount += loopRetCount;
4216 /*****************************************************************************
4218 * Identify loop cloning opportunities, derive loop cloning conditions,
4219 * perform loop cloning, use the derived conditions to choose which
4222 void Compiler::optCloneLoops()
4224 JITDUMP("\n*************** In optCloneLoops()\n");
4225 if (optLoopCount == 0 || !optCanCloneLoops())
4233 printf("Blocks/Trees at start of phase\n");
4234 fgDispBasicBlocks(true);
4238 LoopCloneContext context(optLoopCount, getAllocator());
4240 // Obtain array optimization candidates in the context.
4241 optObtainLoopCloningOpts(&context);
4243 // For each loop, derive cloning conditions for the optimization candidates.
4244 for (unsigned i = 0; i < optLoopCount; ++i)
4246 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4247 if (optInfos == nullptr)
4252 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4254 JITDUMP("> Conditions could not be obtained\n");
4255 context.CancelLoopOptInfo(i);
4259 bool allTrue = false;
4260 bool anyFalse = false;
4261 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4264 context.CancelLoopOptInfo(i);
4268 // Perform static optimizations on the fast path since we always
4269 // have to take the cloned path.
4270 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4272 // No need to clone.
4273 context.CancelLoopOptInfo(i);
4280 // The code in this #if has been useful in debugging loop cloning issues, by
4281 // enabling selective enablement of the loop cloning optimization according to
4284 unsigned methHash = info.compMethodHash();
4285 char* lostr = getenv("loopclonehashlo");
4286 unsigned methHashLo = 0;
4289 sscanf_s(lostr, "%x", &methHashLo);
4290 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4292 char* histr = getenv("loopclonehashhi");
4293 unsigned methHashHi = UINT32_MAX;
4296 sscanf_s(histr, "%x", &methHashHi);
4297 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4299 if (methHash < methHashLo || methHash > methHashHi)
4304 for (unsigned i = 0; i < optLoopCount; ++i)
4306 if (context.GetLoopOptInfo(i) != nullptr)
4309 context.OptimizeConditions(i DEBUGARG(verbose));
4310 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4311 optCloneLoop(i, &context);
4318 printf("\nAfter loop cloning:\n");
4319 fgDispBasicBlocks(/*dumpTrees*/true);
4324 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4326 assert(loopInd < optLoopCount);
4328 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n",
4330 optLoopTable[loopInd].lpHead->bbNum,
4331 optLoopTable[loopInd].lpFirst->bbNum,
4332 optLoopTable[loopInd].lpTop->bbNum,
4333 optLoopTable[loopInd].lpEntry->bbNum,
4334 optLoopTable[loopInd].lpBottom->bbNum);
4336 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4337 unsigned depth = optLoopDepth(loopInd);
4338 unsigned ambientWeight = 1;
4339 for (unsigned j = 0; j < depth; j++)
4341 unsigned lastWeight = ambientWeight;
4342 ambientWeight *= BB_LOOP_WEIGHT;
4343 // If the multiplication overflowed, stick at max.
4344 // (Strictly speaking, a multiplication could overflow and still have a result
4345 // that is >= lastWeight...but if so, the original weight must be pretty large,
4346 // and it got bigger, so that's OK.)
4347 if (ambientWeight < lastWeight)
4349 ambientWeight = BB_MAX_WEIGHT;
4354 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4355 // Be safe by taking the max with the head block's weight.
4356 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4358 // This is the containing loop, if any -- to label any blocks we create that are outside
4359 // the loop being cloned.
4360 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4362 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4363 optEnsureUniqueHead(loopInd, ambientWeight);
4365 // We're going to make
4377 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4389 BasicBlock* h = optLoopTable[loopInd].lpHead;
4390 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4392 // Make a new block to be the unique entry to the loop.
4393 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4394 BasicBlock* newH = fgNewBBafter(BBJ_NONE,
4396 /*extendRegion*/true);
4397 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4398 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4399 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4400 newH->bbNatLoopNum = ambientLoop;
4402 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4405 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4406 // "newPred" will be the predecessor of the blocks of the cloned loop.
4407 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4408 BasicBlock* newPred = b;
4409 if (b->bbJumpKind != BBJ_ALWAYS)
4411 BasicBlock* x = b->bbNext;
4414 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/true);
4415 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4417 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4418 x2->bbNatLoopNum = ambientLoop;
4421 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4426 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4427 // so that "h" already falls through to "e" (e == t == f).
4428 BasicBlock* h2 = nullptr;
4429 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4431 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS,
4432 optLoopTable[loopInd].lpHead,
4433 /*extendRegion*/true);
4434 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4436 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4437 h2->bbNatLoopNum = ambientLoop;
4439 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4440 optUpdateLoopHead(loopInd,optLoopTable[loopInd].lpHead, h2);
4443 // Now we'll clone the blocks of the loop body.
4444 BasicBlock* newFirst = nullptr;
4445 BasicBlock* newBot = nullptr;
4447 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4448 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst;
4449 blk != optLoopTable[loopInd].lpBottom->bbNext;
4452 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind,
4454 /*extendRegion*/true);
4456 BasicBlock::CloneBlockState(this, newBlk, blk);
4457 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4458 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4459 // loop, if one exists -- the parent of the loop we're cloning.
4460 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4462 if (newFirst == nullptr) newFirst = newBlk;
4463 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4465 blockMap->Set(blk, newBlk);
4468 // Perform the static optimizations on the fast path.
4469 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4471 // Now go through the new blocks, remapping their jump targets within the loop.
4472 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst;
4473 blk != optLoopTable[loopInd].lpBottom->bbNext;
4477 BasicBlock* newblk = nullptr;
4478 bool b = blockMap->Lookup(blk, &newblk);
4479 assert(b && newblk != nullptr);
4481 assert(blk->bbJumpKind == newblk->bbJumpKind);
4483 // First copy the jump destination(s) from "blk".
4484 optCopyBlkDest(blk, newblk);
4486 // Now redirect the new block according to "blockMap".
4487 optRedirectBlock(newblk, blockMap);
4490 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry))
4491 || (h->bbJumpKind == BBJ_ALWAYS));
4493 // If all the conditions are true, go to E2.
4494 BasicBlock* e2 = nullptr;
4495 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4497 h->bbJumpKind = BBJ_COND;
4499 // We will create the following structure
4501 // cond0 (in h) -?> cond1
4502 // slow --> e2 (slow) always
4509 // We should always have block conditions, at the minimum, the array should be deref-able
4510 assert(context->HasBlockConditions(loopInd));
4512 // Create a unique header for the slow path.
4513 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4514 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4515 slowHead->bbNatLoopNum = ambientLoop;
4516 slowHead->bbJumpDest = e2;
4518 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4519 condLast->bbJumpDest = slowHead;
4521 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4524 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4526 assert(foundIt && e2 != NULL);
4528 fgUpdateChangedFlowGraph();
4531 //--------------------------------------------------------------------------------------------------
4532 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4535 // context loop cloning context variable
4536 // loopNum the loop index
4537 // head loop head for "loopNum"
4538 // slowHead the slow path loop head
4544 // Create the following structure.
4546 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4547 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4549 // cond0 (in h) -?> cond1
4550 // slowHead --> e2 (slowHead) always
4551 // !cond1 -?> slowHead
4552 // !cond2 -?> slowHead
4554 // !condn -?> slowHead
4557 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4559 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, unsigned loopNum, BasicBlock* head, BasicBlock* slowHead)
4561 JITDUMP("Inserting loop cloning conditions\n");
4562 assert(context->HasBlockConditions(loopNum));
4564 BasicBlock* curCond = head;
4565 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4566 for (unsigned i = 0; i < levelCond->Size(); ++i)
4568 bool isHeaderBlock = (curCond == head);
4570 // Flip the condition if header block.
4571 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4573 // Create each condition block ensuring wiring between them.
4574 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4575 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4578 curCond->inheritWeight(head);
4579 curCond->bbNatLoopNum = head->bbNatLoopNum;
4580 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4583 // Finally insert cloning conditions after all deref conditions have been inserted.
4584 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4588 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4590 BasicBlock* h = optLoopTable[loopInd].lpHead;
4591 BasicBlock* t = optLoopTable[loopInd].lpTop;
4592 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4593 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4595 // If "h" dominates the entry block, then it is the unique header.
4596 if (fgDominate(h, e)) return;
4598 // Otherwise, create a new empty header block, make it the pred of the entry block,
4599 // and redirect the preds of the entry block to go to this.
4601 BasicBlock* beforeTop = t->bbPrev;
4602 // Make sure that the new block is in the same region as the loop.
4603 // (We will only create loops that are entirely within a region.)
4604 BasicBlock * h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4605 // This is in the containing loop.
4606 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4607 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4609 // We don't care where it was put; splice it between beforeTop and top.
4610 if (beforeTop->bbNext != h2)
4612 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4613 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4617 if (h2->bbNext != e)
4619 h2->bbJumpKind = BBJ_ALWAYS;
4622 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4624 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4625 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4626 blockMap->Set(e, h2);
4628 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4630 BasicBlock* predBlock = predEntry->flBlock;
4632 // Skip if predBlock is in the loop.
4633 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum) continue;
4634 optRedirectBlock(predBlock, blockMap);
4637 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4640 /*****************************************************************************
4642 * Determine the kind of interference for the call.
4646 Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4648 assert(call->gtOper == GT_CALL);
4650 // if not a helper, kills everything
4651 if (call->gtCall.gtCallType != CT_HELPER)
4654 // setfield and array address store kill all indirections
4655 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4657 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4658 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4659 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4660 case CORINFO_HELP_SETFIELDOBJ:
4661 case CORINFO_HELP_ARRADDR_ST:
4663 return CALLINT_REF_INDIRS;
4665 case CORINFO_HELP_SETFIELDFLOAT:
4666 case CORINFO_HELP_SETFIELDDOUBLE:
4667 case CORINFO_HELP_SETFIELD8:
4668 case CORINFO_HELP_SETFIELD16:
4669 case CORINFO_HELP_SETFIELD32:
4670 case CORINFO_HELP_SETFIELD64:
4672 return CALLINT_SCL_INDIRS;
4674 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4675 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4676 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4677 case CORINFO_HELP_SETFIELDSTRUCT:
4679 return CALLINT_ALL_INDIRS;
4685 // other helpers kill nothing
4686 return CALLINT_NONE;
4689 /*****************************************************************************
4691 * See if the given tree can be computed in the given precision (which must
4692 * be smaller than the type of the tree for this to make sense). If 'doit'
4693 * is false, we merely check to see whether narrowing is possible; if we
4694 * get called with 'doit' being true, we actually perform the narrowing.
4697 bool Compiler::optNarrowTree(GenTreePtr tree,
4700 ValueNumPair vnpNarrow,
4707 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4709 /* Assume we're only handling integer types */
4710 noway_assert(varTypeIsIntegral(srct));
4711 noway_assert(varTypeIsIntegral(dstt));
4713 unsigned srcSize = genTypeSize(srct);
4714 unsigned dstSize = genTypeSize(dstt);
4716 /* dstt must be smaller than srct to narrow */
4717 if (dstSize >= srcSize)
4720 /* Figure out what kind of a node we have */
4721 oper = tree->OperGet();
4722 kind = tree->OperKind();
4724 if (kind & GTK_ASGOP)
4726 noway_assert(doit == false);
4730 ValueNumPair NoVNPair = ValueNumPair();
4732 if (kind & GTK_LEAF)
4736 /* Constants can usually be narrowed by changing their value */
4737 CLANG_FORMAT_COMMENT_ANCHOR;
4739 #ifndef _TARGET_64BIT_
4744 lval = tree->gtIntConCommon.LngValue();
4749 case TYP_BYTE : lmask = 0x0000007F; break;
4751 case TYP_UBYTE: lmask = 0x000000FF; break;
4752 case TYP_SHORT: lmask = 0x00007FFF; break;
4753 case TYP_CHAR : lmask = 0x0000FFFF; break;
4754 case TYP_INT : lmask = 0x7FFFFFFF; break;
4755 case TYP_UINT : lmask = 0xFFFFFFFF; break;
4757 default: return false;
4760 if ((lval & lmask) != lval)
4765 tree->ChangeOperConst (GT_CNS_INT);
4766 tree->gtType = TYP_INT;
4767 tree->gtIntCon.gtIconVal = (int) lval;
4768 if (vnStore != nullptr)
4770 fgValueNumberTreeConst(tree);
4779 ssize_t ival; ival = tree->gtIntCon.gtIconVal;
4780 ssize_t imask; imask = 0;
4784 case TYP_BYTE : imask = 0x0000007F; break;
4786 case TYP_UBYTE: imask = 0x000000FF; break;
4787 case TYP_SHORT: imask = 0x00007FFF; break;
4788 case TYP_CHAR : imask = 0x0000FFFF; break;
4789 #ifdef _TARGET_64BIT_
4790 case TYP_INT : imask = 0x7FFFFFFF; break;
4791 case TYP_UINT : imask = 0xFFFFFFFF; break;
4792 #endif // _TARGET_64BIT_
4793 default: return false;
4796 if ((ival & imask) != ival)
4799 #ifdef _TARGET_64BIT_
4802 tree->gtType = TYP_INT;
4803 tree->gtIntCon.gtIconVal = (int) ival;
4804 if (vnStore != nullptr)
4806 fgValueNumberTreeConst(tree);
4809 #endif // _TARGET_64BIT_
4813 /* Operands that are in memory can usually be narrowed
4814 simply by changing their gtType */
4817 /* We only allow narrowing long -> int for a GT_LCL_VAR */
4818 if (dstSize == sizeof(int))
4829 noway_assert(doit == false);
4834 if (kind & (GTK_BINOP|GTK_UNOP))
4836 GenTreePtr op1; op1 = tree->gtOp.gtOp1;
4837 GenTreePtr op2; op2 = tree->gtOp.gtOp2;
4839 switch (tree->gtOper)
4842 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
4844 // Is op2 a small constant than can be narrowed into dstt?
4845 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
4846 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
4848 // We will change the type of the tree and narrow op2
4852 tree->gtType = genActualType(dstt);
4853 tree->SetVNs(vnpNarrow);
4855 optNarrowTree(op2, srct, dstt, NoVNPair, true);
4856 // We may also need to cast away the upper bits of op1
4859 assert(tree->gtType == TYP_INT);
4860 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
4862 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
4864 tree->gtOp.gtOp1 = op1;
4875 if (tree->gtOverflow() || varTypeIsSmall(dstt))
4877 noway_assert(doit == false);
4885 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
4886 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
4888 if (gtIsActiveCSE_Candidate(op1) ||
4889 gtIsActiveCSE_Candidate(op2) ||
4890 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) ||
4891 !optNarrowTree(op2, srct, dstt, NoVNPair, doit) )
4893 noway_assert(doit == false);
4897 /* Simply change the type of the tree */
4901 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
4902 tree->gtFlags &= ~GTF_MUL_64RSLT;
4904 tree->gtType = genActualType(dstt);
4905 tree->SetVNs(vnpNarrow);
4913 /* Simply change the type of the tree */
4915 if (doit && (dstSize <= genTypeSize(tree->gtType)))
4917 tree->gtType = genSignedType(dstt);
4918 tree->SetVNs(vnpNarrow);
4920 /* Make sure we don't mess up the variable type */
4921 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
4922 tree->gtFlags |= GTF_VAR_CAST;
4934 /* These can always be narrowed since they only represent 0 or 1 */
4939 var_types cast = tree->CastToType();
4940 var_types oprt = op1->TypeGet();
4941 unsigned oprSize = genTypeSize(oprt);
4946 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
4949 if (tree->gtOverflow())
4952 /* Is this a cast from the type we're narrowing to or a smaller one? */
4954 if (oprSize <= dstSize)
4956 /* Bash the target type of the cast */
4960 dstt = genSignedType(dstt);
4962 if (oprSize == dstSize)
4964 // Same size: change the CAST into a NOP
4965 tree->ChangeOper (GT_NOP);
4966 tree->gtType = dstt;
4967 tree->gtOp.gtOp2 = nullptr;
4968 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
4972 // oprSize is smaller
4973 assert(oprSize < dstSize);
4975 // Change the CastToType in the GT_CAST node
4976 tree->CastToType() = dstt;
4978 // The result type of a GT_CAST is never a small type.
4979 // Use genActualType to widen dstt when it is a small types.
4980 tree->gtType = genActualType(dstt);
4981 tree->SetVNs(vnpNarrow);
4991 if (!gtIsActiveCSE_Candidate(op2) &&
4992 optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
4994 /* Simply change the type of the tree */
4998 tree->gtType = genActualType(dstt);
4999 tree->SetVNs(vnpNarrow);
5006 noway_assert(doit == false);
5016 /*****************************************************************************
5018 * The following logic figures out whether the given variable is assigned
5019 * somewhere in a list of basic blocks (or in an entire loop).
5023 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr *pTree, fgWalkData *data)
5025 GenTreePtr tree = *pTree;
5027 if (tree->OperKind() & GTK_ASGOP)
5029 GenTreePtr dest = tree->gtOp.gtOp1;
5030 genTreeOps destOper = dest->OperGet();
5032 isVarAssgDsc * desc = (isVarAssgDsc*)data->pCallbackData;
5033 assert(desc && desc->ivaSelf == desc);
5035 if (destOper == GT_LCL_VAR)
5037 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5038 if (tvar < lclMAX_ALLSET_TRACKED)
5039 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5041 desc->ivaMaskIncomplete = true;
5043 if (tvar == desc->ivaVar)
5045 if (tree != desc->ivaSkip)
5049 else if (destOper == GT_LCL_FLD)
5051 /* We can't track every field of every var. Moreover, indirections
5052 may access different parts of the var as different (but
5053 overlapping) fields. So just treat them as indirect accesses */
5055 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5056 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5058 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF
5060 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5062 else if (destOper == GT_CLS_VAR)
5064 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5066 else if (destOper == GT_IND)
5068 /* Set the proper indirection bits */
5070 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF
5072 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5075 else if (tree->gtOper == GT_CALL)
5077 isVarAssgDsc * desc = (isVarAssgDsc*)data->pCallbackData;
5078 assert(desc && desc->ivaSelf == desc);
5080 desc->ivaMaskCall = optCallInterf(tree);
5083 return WALK_CONTINUE;
5086 /*****************************************************************************/
5088 bool Compiler::optIsVarAssigned(BasicBlock * beg,
5096 desc.ivaSkip = skip;
5098 desc.ivaSelf = &desc;
5101 desc.ivaMaskCall = CALLINT_NONE;
5102 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5108 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5110 noway_assert(stmt->gtOper == GT_STMT);
5111 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5131 /*****************************************************************************/
5132 int Compiler::optIsSetAssgLoop(unsigned lnum,
5133 ALLVARSET_VALARG_TP vars,
5138 /* Get hold of the loop descriptor */
5140 noway_assert(lnum < optLoopCount);
5141 loop = optLoopTable + lnum;
5143 /* Do we already know what variables are assigned within this loop? */
5145 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5152 /* Prepare the descriptor used by the tree walker call-back */
5154 desc.ivaVar = (unsigned)-1;
5155 desc.ivaSkip = NULL;
5157 desc.ivaSelf = &desc;
5159 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5160 desc.ivaMaskInd = VR_NONE;
5161 desc.ivaMaskCall = CALLINT_NONE;
5162 desc.ivaMaskIncomplete = false;
5164 /* Now walk all the statements of the loop */
5166 beg = loop->lpHead->bbNext;
5167 end = loop->lpBottom;
5169 for (/**/; /**/; beg = beg->bbNext)
5173 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5175 noway_assert(stmt->gtOper == GT_STMT);
5176 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5178 if (desc.ivaMaskIncomplete)
5180 loop->lpFlags |= LPFLG_ASGVARS_INC;
5188 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5189 loop->lpAsgInds = desc.ivaMaskInd;
5190 loop->lpAsgCall = desc.ivaMaskCall;
5192 /* Now we know what variables are assigned in the loop */
5194 loop->lpFlags |= LPFLG_ASGVARS_YES;
5197 /* Now we can finally test the caller's mask against the loop's */
5198 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) ||
5199 (loop->lpAsgInds & inds))
5204 switch (loop->lpAsgCall)
5208 /* Can't hoist if the call might have side effect on an indirection. */
5210 if (loop->lpAsgInds != VR_NONE)
5215 case CALLINT_REF_INDIRS:
5217 /* Can't hoist if the call might have side effect on an ref indirection. */
5219 if (loop->lpAsgInds & VR_IND_REF)
5224 case CALLINT_SCL_INDIRS:
5226 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5228 if (loop->lpAsgInds & VR_IND_SCL)
5233 case CALLINT_ALL_INDIRS:
5235 /* Can't hoist if the call might have side effect on any indirection. */
5237 if (loop->lpAsgInds & (VR_IND_REF|VR_IND_SCL))
5244 /* Other helpers kill nothing */
5249 noway_assert(!"Unexpected lpAsgCall value");
5255 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5260 printf("\nHoisting a copy of ");
5261 printTreeID(origExpr);
5262 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n",
5263 lnum, optLoopTable[lnum].lpFirst->bbNum, optLoopTable[lnum].lpBottom->bbNum);
5264 gtDispTree(origExpr);
5269 // This loop has to be in a form that is approved for hoisting.
5270 assert (optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5272 // Create a copy of the expression and mark it for CSE's.
5273 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5275 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5276 assert(hoistExpr != origExpr);
5277 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5279 GenTreePtr hoist = hoistExpr;
5280 // The value of the expression isn't used (unless it's an assignment).
5281 if (hoistExpr->OperGet() != GT_ASG)
5283 hoist = gtUnusedValNode(hoistExpr);
5286 /* Put the statement in the preheader */
5288 fgCreateLoopPreHeader(lnum);
5290 BasicBlock * preHead = optLoopTable[lnum].lpHead;
5291 assert (preHead->bbJumpKind == BBJ_NONE);
5293 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5294 // (or in this case, will contain) the expression.
5295 compCurBB = preHead;
5297 // Increment the ref counts of any local vars appearing in "hoist".
5298 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5299 // fold away some of the lcl vars referenced by "hoist".
5300 lvaRecursiveIncRefCounts(hoist);
5302 hoist = fgMorphTree(hoist);
5304 GenTreePtr hoistStmt = gtNewStmt(hoist);
5305 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5307 /* simply append the statement at the end of the preHead's list */
5309 GenTreePtr treeList = preHead->bbTreeList;
5313 /* append after last statement */
5315 GenTreePtr last = treeList->gtPrev;
5316 assert (last->gtNext == 0);
5318 last->gtNext = hoistStmt;
5319 hoistStmt->gtPrev = last;
5320 treeList->gtPrev = hoistStmt;
5324 /* Empty pre-header - store the single statement in the block */
5326 preHead->bbTreeList = hoistStmt;
5327 hoistStmt->gtPrev = hoistStmt;
5330 hoistStmt->gtNext = nullptr;
5335 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5340 if (fgStmtListThreaded)
5342 gtSetStmtInfo(hoistStmt);
5343 fgSetStmtSeq(hoistStmt);
5347 if (m_nodeTestData != NULL)
5350 // What is the depth of the loop "lnum"?
5352 unsigned lnumIter = lnum;
5353 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5356 lnumIter = optLoopTable[lnumIter].lpParent;
5359 NodeToTestDataMap* testData = GetNodeTestData();
5361 TestLabelAndNum tlAndN;
5362 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5364 if (tlAndN.m_num == -1)
5367 printTreeID(origExpr);
5368 printf(" was declared 'do not hoist', but is being hoisted.\n");
5371 else if (tlAndN.m_num != depth)
5374 printTreeID(origExpr);
5375 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth %d.\n",
5376 tlAndN.m_num, depth);
5381 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must hoist" annotations.
5382 testData->Remove(origExpr);
5383 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5384 tlAndN.m_tl = TL_CSE_Def;
5385 tlAndN.m_num = m_loopHoistCSEClass++;
5386 testData->Set(hoistExpr, tlAndN);
5392 #if LOOP_HOIST_STATS
5393 if (!m_curLoopHasHoistedExpression)
5395 m_loopsWithHoistedExpressions++;
5396 m_curLoopHasHoistedExpression = true;
5398 m_totalHoistedExpressions++;
5399 #endif // LOOP_HOIST_STATS
5402 void Compiler::optHoistLoopCode()
5404 // If we don't have any loops in the method then take an early out now.
5405 if (optLoopCount == 0)
5409 unsigned jitNoHoist = JitConfig.JitNoHoist();
5417 // The code in this #if has been useful in debugging loop cloning issues, by
5418 // enabling selective enablement of the loop cloning optimization according to
5421 unsigned methHash = info.compMethodHash();
5422 char* lostr = getenv("loophoisthashlo");
5423 unsigned methHashLo = 0;
5426 sscanf_s(lostr, "%x", &methHashLo);
5427 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5429 char* histr = getenv("loophoisthashhi");
5430 unsigned methHashHi = UINT32_MAX;
5433 sscanf_s(histr, "%x", &methHashHi);
5434 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5436 if (methHash < methHashLo || methHash > methHashHi)
5438 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5440 #endif // 0 -- debugging loop cloning issues
5445 printf("\n*************** In optHoistLoopCode()\n");
5446 printf("Blocks/Trees before phase\n");
5447 fgDispBasicBlocks(true);
5452 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5453 // they are invariant.)
5454 LoopHoistContext hoistCtxt(this);
5455 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5457 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5460 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5462 optHoistLoopNest(lnum, &hoistCtxt);
5471 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5472 fgDispBasicBlocks(true);
5476 // Make sure that the predecessor lists are accurate
5477 fgDebugCheckBBlist();
5482 // Test Data stuff..
5483 // If we have no test data, early out.
5484 if (m_nodeTestData == NULL) return;
5485 NodeToTestDataMap* testData = GetNodeTestData();
5486 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5488 TestLabelAndNum tlAndN;
5489 GenTreePtr node = ki.Get();
5490 bool b = testData->Lookup(node, &tlAndN);
5492 if (tlAndN.m_tl != TL_LoopHoist) continue;
5493 // Otherwise, it is a loop hoist annotation.
5494 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5495 if (tlAndN.m_num >= 0)
5499 printf(" was declared 'must hoist', but has not been hoisted.\n");
5506 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5508 // Do this loop, then recursively do all nested loops.
5509 CLANG_FORMAT_COMMENT_ANCHOR;
5511 #if LOOP_HOIST_STATS
5513 m_curLoopHasHoistedExpression = false;
5514 m_loopsConsidered++;
5515 #endif // LOOP_HOIST_STATS
5517 optHoistThisLoop(lnum, hoistCtxt);
5519 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5521 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5523 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5524 // TODO-Cleanup: we should have a set abstraction for loops.
5525 if (hoistedInCurLoop != nullptr)
5527 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5531 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5533 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5537 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling)
5539 optHoistLoopNest(child, hoistCtxt);
5543 // TODO-Cleanup: we should have a set abstraction for loops.
5544 if (hoistedInCurLoop != nullptr)
5546 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5548 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5549 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5555 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5557 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5559 /* If loop was removed continue */
5561 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5564 /* Get the head and tail of the loop */
5566 BasicBlock* head = pLoopDsc->lpHead;
5567 BasicBlock* tail = pLoopDsc->lpBottom;
5568 BasicBlock* lbeg = pLoopDsc->lpEntry;
5571 // We must have a do-while loop
5572 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5575 // The loop-head must dominate the loop-entry.
5576 // TODO-CQ: Couldn't we make this true if it's not?
5577 if (!fgDominate(head, lbeg))
5580 // if lbeg is the start of a new try block then we won't be able to hoist
5581 if (!BasicBlock::sameTryRegion(head, lbeg))
5584 // We don't bother hoisting when inside of a catch block
5585 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5588 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5590 unsigned begn = lbeg->bbNum;
5591 unsigned endn = tail->bbNum;
5593 // Ensure the per-loop sets/tables are empty.
5594 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5599 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5600 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5604 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut,
5605 pLoopDsc->lpVarUseDef));
5607 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5608 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5609 pLoopDsc->lpHoistedExprCount = 0;
5611 #ifndef _TARGET_64BIT_
5612 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5614 if (longVarsCount > 0)
5616 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5617 // the Counts such that each TYP_LONG variable counts twice.
5619 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5620 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5625 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5626 lvaDispVarSet(lvaLongVars);
5629 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5630 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5632 #endif // !_TARGET_64BIT_
5637 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5638 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5640 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5641 lvaDispVarSet(pLoopDsc->lpVarInOut);
5643 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5644 lvaDispVarSet(loopVars);
5649 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5651 if (floatVarsCount > 0)
5653 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5654 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5656 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5657 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5658 pLoopDsc->lpHoistedFPExprCount = 0;
5660 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5661 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5666 printf( " INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5667 lvaDispVarSet(inOutFPVars);
5669 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5670 lvaDispVarSet(loopFPVars);
5674 else // (floatVarsCount == 0)
5676 pLoopDsc->lpLoopVarFPCount = 0;
5677 pLoopDsc->lpVarInOutFPCount = 0;
5678 pLoopDsc->lpHoistedFPExprCount = 0;
5681 // Find the set of definitely-executed blocks.
5682 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5683 // Until we have post-dominators, we'll special-case for single-exit blocks.
5684 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5685 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5687 assert(pLoopDsc->lpExit != nullptr);
5688 BasicBlock* cur = pLoopDsc->lpExit;
5689 // Push dominators, until we reach "entry" or exit the loop.
5690 while ( cur != nullptr
5691 && pLoopDsc->lpContains(cur)
5692 && cur != pLoopDsc->lpEntry)
5697 // If we didn't reach the entry block, give up and *just* push the entry block.
5698 if (cur != pLoopDsc->lpEntry)
5702 defExec.Push(pLoopDsc->lpEntry);
5704 else // More than one exit
5706 // We'll assume that only the entry block is definitely executed.
5707 // We could in the future do better.
5708 defExec.Push(pLoopDsc->lpEntry);
5711 while (defExec.Size() > 0)
5713 // Consider in reverse order: dominator before dominatee.
5714 BasicBlock* blk = defExec.Pop();
5715 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5719 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5720 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk,
5722 LoopHoistContext* hoistCtxt)
5724 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5725 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5726 unsigned blkWeight = blk->getBBWeight(this);
5731 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
5733 refCntWtd2str(blkWeight),
5735 pLoopDsc->lpFirst->bbNum,
5736 pLoopDsc->lpBottom->bbNum,
5737 firstBlockAndBeforeSideEffect ? "true" : "false");
5738 if (blkWeight < (BB_UNITY_WEIGHT / 10))
5740 printf(" block weight is too small to perform hoisting.\n");
5745 if (blkWeight < (BB_UNITY_WEIGHT / 10))
5747 // Block weight is too small to perform hoisting.
5751 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5753 GenTreePtr stmtTree = stmt->gtStmtExpr;
5755 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
5758 // we will try to hoist the top-level stmtTree
5759 optHoistCandidate(stmtTree, lnum, hoistCtxt);
5764 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
5766 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5768 bool loopContainsCall = pLoopDsc->lpContainsCall;
5771 int hoistedExprCount;
5775 if (varTypeIsFloating(tree->TypeGet()))
5777 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
5778 loopVarCount = pLoopDsc->lpLoopVarFPCount;
5779 varInOutCount = pLoopDsc->lpVarInOutFPCount;
5781 availRegCount = CNT_CALLEE_SAVED_FLOAT;
5782 if (!loopContainsCall)
5784 availRegCount += CNT_CALLEE_TRASH_FLOAT-1;
5787 // For ARM each double takes two FP registers
5788 // For now on ARM we won't track singles/doubles
5789 // and instead just assume that we always have doubles.
5796 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
5797 loopVarCount = pLoopDsc->lpLoopVarCount;
5798 varInOutCount = pLoopDsc->lpVarInOutCount;
5800 availRegCount = CNT_CALLEE_SAVED-1;
5801 if (!loopContainsCall)
5803 availRegCount += CNT_CALLEE_TRASH-1;
5805 #ifndef _TARGET_64BIT_
5806 // For our 32-bit targets Long types take two registers.
5807 if (varTypeIsLong(tree->TypeGet()))
5809 availRegCount = (availRegCount+1) / 2;
5814 // decrement the availRegCount by the count of expression that we have already hoisted.
5815 availRegCount -= hoistedExprCount;
5817 // the variables that are read/written inside the loop should
5818 // always be a subset of the InOut variables for the loop
5819 assert(loopVarCount <= varInOutCount);
5821 // When loopVarCount >= availRegCount we believe that all of the
5822 // available registers will get used to hold LclVars inside the loop.
5823 // This pessimistically assumes that each loopVar has a conflicting
5824 // lifetime with every other loopVar.
5825 // For this case we will hoist the expression only if is profitable
5826 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
5827 // as we believe it will be placed in the stack or one of the other
5828 // loopVars will be spilled into the stack
5830 if (loopVarCount >= availRegCount)
5832 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
5833 if (tree->gtCostEx < (2*IND_COST_EX))
5837 // When varInOutCount < availRegCount we are know that there are
5838 // some available register(s) when we enter the loop body.
5839 // When varInOutCount == availRegCount there often will be a register
5840 // available when we enter the loop body, since a loop often defines a
5841 // LclVar on exit or there is often at least one LclVar that is worth
5842 // spilling to the stack to make way for this hoisted expression.
5843 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
5845 if (varInOutCount > availRegCount)
5847 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
5848 if (tree->gtCostEx <= MIN_CSE_COST+1)
5856 // This function returns true if 'tree' is a loop invariant expression.
5857 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
5859 bool Compiler::optHoistLoopExprsForTree(GenTreePtr tree,
5861 LoopHoistContext* hoistCtxt,
5862 bool* pFirstBlockAndBeforeSideEffect,
5865 // First do the children.
5866 // We must keep track of whether each child node was hoistable or not
5868 unsigned nChildren = tree->NumChildren();
5869 bool childrenHoistable[GenTree::MAX_CHILDREN];
5871 // Initialize the array elements for childrenHoistable[] to false
5872 for (unsigned i = 0; i < nChildren; i++)
5874 childrenHoistable[i] = false;
5877 bool treeIsInvariant = true;
5878 for (unsigned childNum = 0; childNum < nChildren; childNum++)
5880 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect, &childrenHoistable[childNum]))
5882 treeIsInvariant = false;
5886 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
5888 bool treeIsHoistable = treeIsInvariant;
5890 // But we must see if anything else prevents "tree" from being hoisted.
5892 if (treeIsInvariant)
5894 // Tree must be a suitable CSE candidate for us to be able to hoist it.
5895 treeIsHoistable = optIsCSEcandidate(tree);
5897 // If it's a call, it must be a helper call, and be pure.
5898 // Further, if it may run a cctor, it must be labeled as "Hoistable"
5899 // (meaning it won't run a cctor because the class is not precise-init).
5900 if (treeIsHoistable && tree->OperGet() == GT_CALL)
5902 GenTreeCall* call = tree->AsCall();
5903 if (call->gtCallType != CT_HELPER)
5905 treeIsHoistable = false;
5909 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
5910 if (!s_helperCallProperties.IsPure(helpFunc))
5912 treeIsHoistable = false;
5914 else if ( s_helperCallProperties.MayRunCctor(helpFunc)
5915 && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
5917 treeIsHoistable = false;
5922 if (treeIsHoistable)
5924 if (!(*pFirstBlockAndBeforeSideEffect))
5926 // For now, we give up on an expression that might raise an exception if it is after the
5927 // first possible global side effect (and we assume we're after that if we're not in the first block).
5928 //TODO-CQ: this is when we might do loop cloning.
5930 if ((tree->gtFlags & GTF_EXCEPT) != 0)
5932 treeIsHoistable = false;
5935 // Currently we must give up on reads from static variables (even if we are in the first block).
5937 if (tree->OperGet() == GT_CLS_VAR)
5939 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe method Main
5940 treeIsHoistable = false;
5944 // Is the value of the whole tree loop invariant?
5945 treeIsInvariant = optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
5947 // Is the value of the whole tree loop invariant?
5948 if (!treeIsInvariant)
5950 treeIsHoistable = false;
5954 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
5955 // If we encounter a tree with a call in it
5956 // or if we see an assignment to global we set it to false.
5958 // If we are already set to false then we can skip these checks
5960 if (*pFirstBlockAndBeforeSideEffect)
5962 // For this purpose, we only care about memory side effects. We assume that expressions will
5963 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
5964 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
5965 // here, since that includes exceptions.)
5966 if (tree->gtFlags & GTF_CALL)
5968 *pFirstBlockAndBeforeSideEffect = false;
5970 else if (tree->OperIsAssignment())
5972 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
5973 GenTreePtr lhs = tree->gtOp.gtOp1;
5974 if (lhs->gtFlags & GTF_GLOB_REF)
5976 *pFirstBlockAndBeforeSideEffect = false;
5978 } else if (tree->OperIsCopyBlkOp())
5980 GenTreePtr args = tree->gtOp.gtOp1;
5981 assert(args->OperGet() == GT_LIST);
5982 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
5984 *pFirstBlockAndBeforeSideEffect = false;
5989 // If this 'tree' is hoistable then we return and the caller will
5990 // decide to hoist it as part of larger hoistable expression.
5992 if (!treeIsHoistable)
5994 // We are not hoistable so we will now hoist any hoistable children.
5996 for (unsigned childNum = 0; childNum < nChildren; childNum++)
5998 if (childrenHoistable[childNum])
6000 // We can't hoist the LHS of an assignment, isn't a real use.
6001 if (childNum == 0 && (tree->OperIsAssignment()))
6004 GenTreePtr child = tree->GetChild(childNum);
6006 // We try to hoist this 'child' tree
6007 optHoistCandidate(child, lnum, hoistCtxt);
6012 *pHoistable = treeIsHoistable;
6013 return treeIsInvariant;
6016 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6018 if (lnum == BasicBlock::NOT_IN_LOOP)
6020 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6024 // The outer loop also must be suitable for hoisting...
6025 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6028 // If the hoisted expression isn't valid at this loop head then break
6029 if (!optTreeIsValidAtLoopHead(tree, lnum))
6032 // It must pass the hoistable profitablity tests for this loop level
6033 if (!optIsProfitableToHoistableTree(tree, lnum))
6038 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6040 // already hoisted in a parent loop, so don't hoist this expression.
6044 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6046 // already hoisted this expression in the current loop, so don't hoist this expression.
6050 // Expression can be hoisted
6051 optPerformHoistExpr(tree, lnum);
6053 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6054 if (!varTypeIsFloating(tree->TypeGet()))
6056 optLoopTable[lnum].lpHoistedExprCount++;
6057 #ifndef _TARGET_64BIT_
6058 // For our 32-bit targets Long types take two registers.
6059 if (varTypeIsLong(tree->TypeGet()))
6061 optLoopTable[lnum].lpHoistedExprCount++;
6065 else // Floating point expr hoisted
6067 optLoopTable[lnum].lpHoistedFPExprCount++;
6070 // Record the hoisted expression in hoistCtxt
6071 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6075 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6077 // If it is not a VN, is not loop-invariant.
6078 if (vn == ValueNumStore::NoVN) return false;
6080 // We'll always short-circuit constants.
6081 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6084 // If we've done this query previously, don't repeat.
6085 bool previousRes = false;
6086 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6091 if (vnStore->GetVNFunc(vn, &funcApp))
6093 if (funcApp.m_func == VNF_PhiDef)
6095 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6096 VNFuncApp phiDefValFuncApp;
6097 if ( !vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp)
6098 || phiDefValFuncApp.m_func != VNF_Phi)
6100 // It's not *really* a definition, rather a pass-through of some other VN.
6101 // (This could occur, say if both sides of an if-then-else diamond made the
6102 // same assignment to a variable.)
6103 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6107 // Is the definition within the loop? If so, is not loop-invariant.
6108 unsigned lclNum = funcApp.m_args[0];
6109 unsigned ssaNum = funcApp.m_args[1];
6110 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6111 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6114 else if (funcApp.m_func == VNF_PhiHeapDef)
6116 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6117 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6121 for (unsigned i = 0; i < funcApp.m_arity; i++)
6123 // TODO-CQ: We need to either make sure that *all* VN functions
6124 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6126 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6136 // Otherwise, assume non-function "new, unique" VN's are not loop invariant.
6140 loopVnInvariantCache->Set(vn, res);
6144 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6146 if (tree->OperIsLocal())
6148 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6149 unsigned lclNum = lclVar->gtLclNum;
6151 // The lvlVar must be have an Ssa tracked lifetime
6152 if (fgExcludeFromSsa(lclNum))
6155 // If the loop does not contains the SSA def we can hoist it.
6156 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6159 else if (tree->OperIsConst())
6163 else // If every one of the children nodes are valid at this Loop's Head.
6165 unsigned nChildren = tree->NumChildren();
6166 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6168 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6177 /*****************************************************************************
6179 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6180 * header. The pre-header will replace the current lpHead in the loop table.
6181 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6182 * will also be dominated by the loop-top, lpHead->bbNext.
6186 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6188 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6190 /* This loop has to be a "do-while" loop */
6192 assert (pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6194 /* Have we already created a loop-preheader block? */
6196 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6199 BasicBlock* head = pLoopDsc->lpHead;
6200 BasicBlock* top = pLoopDsc->lpTop;
6201 BasicBlock* entry = pLoopDsc->lpEntry;
6203 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6204 if (!BasicBlock::sameTryRegion(head, entry))
6207 // Ensure that lpHead always dominates lpEntry
6209 noway_assert(fgDominate(head, entry));
6211 /* Get hold of the first block of the loop body */
6213 assert (top == entry);
6215 /* Allocate a new basic block */
6217 BasicBlock * preHead = bbNewBasicBlock(BBJ_NONE);
6218 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6220 // Must set IL code offset
6221 preHead->bbCodeOffs = top->bbCodeOffs;
6223 // Set the default value of the preHead weight in case we don't have
6224 // valid profile data and since this blocks weight is just an estimate
6225 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6227 preHead->inheritWeight(head);
6228 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6232 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n",
6233 preHead->bbNum, lnum, top->bbNum, pLoopDsc->lpBottom->bbNum,
6234 refCntWtd2str(preHead->getBBWeight(this)));
6237 // The preheader block is part of the containing loop (if any).
6238 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6240 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6242 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6244 preHead->bbWeight = 0;
6245 preHead->bbFlags |= BBF_RUN_RARELY;
6249 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0)
6250 && ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0)
6251 && ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6253 if (allValidProfileWeights)
6255 double loopEnteredCount;
6256 double loopSkippedCount;
6258 if (fgHaveValidEdgeWeights)
6260 flowList * edgeToNext = fgGetPredForBlock(head->bbNext, head);
6261 flowList * edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6262 noway_assert(edgeToNext != NULL);
6263 noway_assert(edgeToJump != NULL);
6265 loopEnteredCount = ((double) edgeToNext->flEdgeWeightMin + (double) edgeToNext->flEdgeWeightMax) / 2.0;
6266 loopSkippedCount = ((double) edgeToJump->flEdgeWeightMin + (double) edgeToJump->flEdgeWeightMax) / 2.0;
6270 loopEnteredCount = (double) head->bbNext->bbWeight;
6271 loopSkippedCount = (double) head->bbJumpDest->bbWeight;
6274 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6276 // Calculate a good approximation of the preHead's block weight
6277 unsigned preHeadWeight = (unsigned) (((double) head->bbWeight * loopTakenRatio) + 0.5);
6278 preHead->setBBWeight(max(preHeadWeight, 1));
6279 noway_assert(!preHead->isRunRarely());
6284 // Link in the preHead block.
6285 fgInsertBBbefore(top, preHead);
6287 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6288 // However, that is too expensive at this point. Instead, we update the phi
6289 // node block references, if we created pre-header block due to hoisting.
6290 // This is sufficient because any definition participating in SSA that flowed
6291 // into the phi via the loop header block will now flow through the preheader
6292 // block from the header block.
6294 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6296 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6297 if (tree->OperGet() != GT_ASG)
6301 GenTreePtr op2 = tree->gtGetOp2();
6302 if (op2->OperGet() != GT_PHI)
6306 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6307 while (args != nullptr)
6309 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6310 if (phiArg->gtPredBB == head)
6312 phiArg->gtPredBB = preHead;
6314 args = args->Rest();
6318 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6319 // to set the handler index on the pre header without updating the exception table.
6320 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6322 // Update the EH table to make the hoisted block part of the loop's EH block.
6323 fgExtendEHRegionBefore(top);
6325 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6326 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6328 /* Update the loop entry */
6330 pLoopDsc->lpHead = preHead;
6331 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6333 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6334 All predecessors of 'beg', (which is the entry in the loop)
6335 now have to jump to 'preHead', unless they are dominated by 'head' */
6337 preHead->bbRefs = 0;
6338 fgAddRefPred(preHead, head);
6339 bool checkNestedLoops = false;
6341 for (flowList * pred = top->bbPreds; pred; pred = pred->flNext)
6343 BasicBlock * predBlock = pred->flBlock;
6345 if (fgDominate(top, predBlock))
6347 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6348 // (we know that 'head' dominates 'top'), but using 'top' instead of
6349 // 'head' in the test allows us to not enter here if 'predBlock == head'
6351 if (predBlock != pLoopDsc->lpBottom)
6353 noway_assert(predBlock != head);
6354 checkNestedLoops = true;
6359 switch (predBlock->bbJumpKind)
6362 noway_assert(predBlock == head);
6366 if (predBlock == head)
6368 noway_assert(predBlock->bbJumpDest != top);
6374 case BBJ_EHCATCHRET:
6375 noway_assert(predBlock->bbJumpDest == top);
6376 predBlock->bbJumpDest = preHead;
6377 preHead->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
6379 if (predBlock == head)
6381 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6382 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6383 // Just break, pred will be removed after switch.
6387 fgRemoveRefPred(top, predBlock);
6388 fgAddRefPred(preHead, predBlock);
6393 unsigned jumpCnt; jumpCnt = predBlock->bbJumpSwt->bbsCount;
6394 BasicBlock * * jumpTab; jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6399 if ((*jumpTab) == top)
6401 (*jumpTab) = preHead;
6403 fgRemoveRefPred(top, predBlock);
6404 fgAddRefPred(preHead, predBlock);
6405 preHead->bbFlags |= BBF_JMP_TARGET|BBF_HAS_LABEL;
6408 while (++jumpTab, --jumpCnt);
6411 noway_assert(!"Unexpected bbJumpKind");
6416 noway_assert(!fgGetPredForBlock(top, preHead));
6417 fgRemoveRefPred(top, head);
6418 fgAddRefPred(top, preHead);
6421 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6422 (other than the back-edge of the loop we are considering) then we likely have nested
6423 do-while loops with the same entry block and inserting the preheader block changes the head
6424 of all the nested loops. Now we will update this piece of information in the loop table, and
6425 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6426 do-while loops with the same entry block).
6428 if (checkNestedLoops)
6430 for (unsigned l = 0; l < optLoopCount; l++)
6432 if (optLoopTable[l].lpHead == head)
6434 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6435 noway_assert(optLoopTable[l].lpEntry == top);
6436 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6437 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6440 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n",
6441 preHead->bbNum, l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6448 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6450 unsigned lnum = blk->bbNatLoopNum;
6451 while (lnum != BasicBlock::NOT_IN_LOOP)
6453 if (optLoopTable[lnum].lpEntry == blk)
6458 lnum = optLoopTable[lnum].lpParent;
6463 void Compiler::optComputeLoopSideEffects()
6466 for (lnum = 0; lnum < optLoopCount; lnum++)
6468 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6469 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6470 optLoopTable[lnum].lpContainsCall = false;
6473 for (lnum = 0; lnum < optLoopCount; lnum++)
6475 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6478 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) // Is outermost...
6479 optComputeLoopNestSideEffects(lnum);
6482 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6483 #ifndef _TARGET_64BIT_
6484 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6487 for (unsigned i = 0; i < lvaCount; i++)
6489 LclVarDsc* varDsc = &lvaTable[i];
6490 if (varDsc->lvTracked)
6492 if (varTypeIsFloating(varDsc->lvType))
6494 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6496 #ifndef _TARGET_64BIT_
6497 else if (varTypeIsLong(varDsc->lvType))
6499 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6506 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6508 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6509 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6510 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6512 optComputeLoopSideEffectsOfBlock(bbInLoop);
6516 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6518 unsigned mostNestedLoop = blk->bbNatLoopNum;
6519 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6521 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6523 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6525 // Now iterate over the remaining statements, and their trees.
6526 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6528 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6530 genTreeOps oper = tree->OperGet();
6532 // Even after we set heapHavoc we still may want to know if a loop contains calls
6535 if (oper == GT_CALL)
6537 // Record that this loop contains a call
6538 AddContainsCallAllContainingLoops(mostNestedLoop);
6541 // If we just set lpContainsCall or it was previously set
6542 if (optLoopTable[mostNestedLoop].lpContainsCall)
6544 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6548 // We are just looking for GT_CALL nodes after heapHavoc was set.
6552 // otherwise heapHavoc is not set
6555 // This body is a distillation of the heap-side effect code of value numbering.
6556 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6557 // that the compiler creates.
6559 if (GenTree::OperIsAssignment(oper))
6561 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
6563 if (lhs->OperGet() == GT_IND)
6565 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/true);
6566 FieldSeqNode* fldSeqArrElem = nullptr;
6568 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6576 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6578 // If it's a local byref for which we recorded a value number, use that...
6579 GenTreeLclVar* argLcl = arg->AsLclVar();
6580 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6582 ValueNum argVN = lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6584 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) && funcApp.m_func == VNF_PtrToArrElem)
6586 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6587 CORINFO_CLASS_HANDLE elemType = CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6588 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6589 // Don't set heapHavoc below.
6596 // Is the LHS an array index expression?
6597 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6599 // We actually ignore "fldSeq" -- any modification to an S[], at any
6600 // field of "S", will lose all information about the array type.
6601 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6602 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6606 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6608 GenTreePtr obj = nullptr; // unused
6609 GenTreePtr staticOffset = nullptr; // unused
6610 FieldSeqNode* fldSeq = nullptr;
6612 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6613 (fldSeq != FieldSeqStore::NotAField()))
6615 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6616 assert(fldSeq != nullptr);
6617 if (fldSeq->IsFirstElemFieldSeq())
6619 fldSeq = fldSeq->m_next;
6620 assert(fldSeq != nullptr);
6623 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6631 else if (lhs->OperGet() == GT_CLS_VAR)
6633 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6635 // Otherwise, must be local lhs form. I should assert that.
6636 else if (lhs->OperGet() == GT_LCL_VAR)
6638 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6639 GenTreePtr rhs = tree->gtOp.gtOp2;
6640 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6641 // If we gave the RHS a value number, propagate it.
6642 if (rhsVN != ValueNumStore::NoVN)
6644 rhsVN = vnStore->VNNormVal(rhsVN);
6645 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6647 lvaTable[lhsLcl->GetLclNum()].GetPerSsaData(lhsLcl->GetSsaNum())->m_vnPair.SetLiberal(rhsVN);
6652 else // not GenTree::OperIsAssignment(oper)
6657 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
6661 // Is it an addr of a array index expression?
6663 GenTreePtr addrArg = tree->gtOp.gtOp1;
6664 if (addrArg->OperGet() == GT_IND)
6666 // Is the LHS an array index expression?
6667 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
6670 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
6672 CORINFO_CLASS_HANDLE elemType = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6673 tree->gtVNPair.SetBoth(vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
6674 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
6675 // The rest are dummy arguments.
6676 vnStore->VNForNull(),
6677 vnStore->VNForNull(),
6678 vnStore->VNForNull()));
6688 GenTreeLclVarCommon* lclVarTree;
6690 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6692 // For now, assume arbitrary side effects on the heap...
6693 // TODO-CQ: Why not be complete, and get this case right?
6699 case GT_LOCKADD: // Binop
6700 case GT_XADD: // Binop
6701 case GT_XCHG: // Binop
6702 case GT_CMPXCHG: // Specialop
6710 GenTreeCall* call = tree->AsCall();
6712 // Record that this loop contains a call
6713 AddContainsCallAllContainingLoops(mostNestedLoop);
6715 if (call->gtCallType == CT_HELPER)
6717 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6718 if (s_helperCallProperties.MutatesHeap(helpFunc))
6722 else if (s_helperCallProperties.MayRunCctor(helpFunc))
6724 // If the call is labeled as "Hoistable", then we've checked the
6725 // class that would be constructed, and it is not precise-init, so
6726 // the cctor will not be run by this call. Otherwise, it might be,
6727 // and might have arbitrary side effects.
6728 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
6742 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
6751 // Record that all loops containing this block have heap havoc effects.
6752 unsigned lnum = mostNestedLoop;
6753 while (lnum != BasicBlock::NOT_IN_LOOP)
6755 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
6756 lnum = optLoopTable[lnum].lpParent;
6761 // Marks the containsCall information to "lnum" and any parent loops.
6762 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
6764 assert(0 <= lnum && lnum < optLoopCount);
6765 while (lnum != BasicBlock::NOT_IN_LOOP)
6767 optLoopTable[lnum].lpContainsCall = true;
6768 lnum = optLoopTable[lnum].lpParent;
6772 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
6773 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock * blk)
6775 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
6776 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
6778 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
6779 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
6782 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
6783 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock * blk)
6785 assert(0 <= lnum && lnum < optLoopCount);
6786 while (lnum != BasicBlock::NOT_IN_LOOP)
6788 optLoopTable[lnum].AddVariableLiveness(this, blk);
6789 lnum = optLoopTable[lnum].lpParent;
6793 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
6794 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
6796 assert(0 <= lnum && lnum < optLoopCount);
6797 while (lnum != BasicBlock::NOT_IN_LOOP)
6799 optLoopTable[lnum].AddModifiedField(this, fldHnd);
6800 lnum = optLoopTable[lnum].lpParent;
6804 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
6805 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
6807 assert(0 <= lnum && lnum < optLoopCount);
6808 while (lnum != BasicBlock::NOT_IN_LOOP)
6810 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
6811 lnum = optLoopTable[lnum].lpParent;
6815 /*****************************************************************************
6817 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
6818 * The 'keepList'is either a single tree or a list of trees that are formed by
6819 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
6820 * gtExtractSideEffList method.
6824 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr *pTree, fgWalkData *data)
6826 GenTreePtr tree = *pTree;
6827 Compiler * comp = data->compiler;
6828 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
6830 // We may have a non-NULL side effect list that is being kept
6834 GenTreePtr keptTree = keepList;
6835 while (keptTree->OperGet() == GT_COMMA)
6837 assert(keptTree->OperKind() & GTK_SMPOP);
6838 GenTreePtr op1 = keptTree->gtOp.gtOp1;
6839 GenTreePtr op2 = keptTree->gtGetOp2();
6841 // For the GT_COMMA case the op1 is part of the orginal CSE tree
6842 // that is being kept because it contains some side-effect
6846 // This tree and all of its sub trees are being kept.
6847 return WALK_SKIP_SUBTREES;
6850 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
6851 // which can again be another GT_COMMA or the final side-effect part
6855 if (tree == keptTree)
6857 // This tree and all of its sub trees are being kept.
6858 return WALK_SKIP_SUBTREES;
6862 // This node is being removed from the graph of GenTreePtr
6864 // Look for any local variable references
6866 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
6871 /* This variable ref is going away, decrease its ref counts */
6873 lclNum = tree->gtLclVarCommon.gtLclNum;
6874 assert(lclNum < comp->lvaCount);
6875 varDsc = comp->lvaTable + lclNum;
6877 // make sure it's been initialized
6878 assert(comp->compCurBB != nullptr);
6879 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
6881 /* Decrement its lvRefCnt and lvRefCntWtd */
6883 // Use getBBWeight to determine the proper block weight.
6884 // This impacts the block weights when we have IBC data.
6885 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
6888 return WALK_CONTINUE;
6891 /*****************************************************************************
6893 * Routine called to decrement the LclVar ref counts when removing a tree
6894 * during the remove RangeCheck phase.
6895 * This method will decrement the refcounts for any LclVars used below 'deadTree',
6896 * unless the node is found in the 'keepList' (which are saved side effects)
6897 * The keepList is communicated using the walkData.pCallbackData field
6898 * Also the compCurBB must be set to the current BasicBlock which contains
6899 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
6902 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
6904 // We communicate this value using the walkData.pCallbackData field
6906 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
6909 /*****************************************************************************
6911 * Given an array index node, mark it as not needing a range check.
6914 void Compiler::optRemoveRangeCheck(GenTreePtr tree,
6916 bool updateCSEcounts,
6917 unsigned sideEffFlags,
6934 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
6937 noway_assert(stmt->gtOper == GT_STMT);
6938 noway_assert(tree->gtOper == GT_COMMA);
6939 noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
6940 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
6942 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
6947 printf("Before optRemoveRangeCheck:\n");
6952 GenTreePtr sideEffList = nullptr;
6955 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
6958 // Decrement the ref counts for any LclVars that are being deleted
6960 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
6962 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
6963 tree->gtOp.gtOp1 = (sideEffList != NULL) ? sideEffList : gtNewNothingNode();
6965 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
6966 tree->gtFlags |= GTF_DONT_CSE;
6968 /* Recalculate the gtCostSz, etc... */
6969 gtSetStmtInfo(stmt);
6971 /* Re-thread the nodes if necessary */
6972 if (fgStmtListThreaded)
6980 printf("After optRemoveRangeCheck:\n");
6987 /*****************************************************************************
6988 * Return the scale in an array reference, given a pointer to the
6989 * multiplication node.
6992 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr *pIndex DEBUGARG(bool bRngChk))
6995 assert (mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
6996 assert (mul->gtOp.gtOp2->IsCnsIntOrI());
6998 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7000 if (mul->gtOper == GT_LSH)
7001 scale = ((ssize_t)1) << scale;
7003 GenTreePtr index = mul->gtOp.gtOp1;
7005 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7007 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7008 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7009 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7010 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7011 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7012 index = index->gtOp.gtOp1;
7015 assert (!bRngChk || index->gtOper != GT_COMMA);
7023 /*****************************************************************************
7024 * Find the last assignment to of the local variable in the block. Return
7025 * RHS or NULL. If any local variable in the RHS has been killed in
7026 * intervening code, return NULL. If the variable being searched for is killed
7027 * in the intervening code, return NULL.
7031 GenTreePtr Compiler::optFindLocalInit(BasicBlock *block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* pLhsRhsKilledAfterInit)
7033 assert(pKilledInOut);
7034 assert(pLhsRhsKilledAfterInit);
7036 *pLhsRhsKilledAfterInit = false;
7038 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7040 GenTreePtr list = block->bbTreeList;
7046 GenTreePtr rhs = NULL;
7047 GenTreePtr stmt = list;
7050 stmt = stmt->gtPrev;
7056 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7057 // If we encounter an assignment to a local variable,
7058 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7060 // And the assigned variable equals the input local,
7061 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7063 // If the assignment is '=' and it is not a conditional, then return rhs.
7064 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7066 rhs = tree->gtOp.gtOp2;
7068 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7069 // as we found a kill to the input local.
7072 *pLhsRhsKilledAfterInit = true;
7073 assert(rhs == NULL);
7079 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7084 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7088 while (stmt != list);
7095 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7096 varRefKinds rhsRefs = VR_NONE;
7097 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7098 bool b = lvaLclVarRefs(rhs, NULL, &rhsRefs, &rhsLocals);
7100 !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) ||
7101 (rhsRefs != VR_NONE))
7103 // If RHS has been indirectly referenced, consider it a write and a kill.
7104 *pLhsRhsKilledAfterInit = true;
7111 /*****************************************************************************
7113 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7118 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2,
7121 if (op1->gtOper == GT_CNS_INT &&
7122 op2->gtOper == GT_CNS_INT)
7124 add1 += op1->gtIntCon.gtIconVal;
7125 add2 += op2->gtIntCon.gtIconVal;
7129 /* Check for +/- constant on either operand */
7131 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7133 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7134 op1 = op1->gtOp.gtOp1;
7137 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7139 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7140 op2 = op2->gtOp.gtOp1;
7143 /* We only allow local variable references */
7145 if (op1->gtOper != GT_LCL_VAR)
7147 if (op2->gtOper != GT_LCL_VAR)
7149 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7152 /* NOTE: Caller ensures that this variable has only one def */
7154 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7155 // printf("size [%d]:\n", add2); gtDispTree(op2);
7160 return (bool)(add1 <= add2);
7165 //------------------------------------------------------------------------------
7166 // optObtainLoopCloningOpts: Identify optimization candidates and update
7167 // the "context" for array optimizations.
7170 // context - data structure where all loop cloning info is kept. The
7171 // optInfo fields of the context are updated with the
7172 // identified optimization candidates.
7174 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7176 for (unsigned i = 0; i < optLoopCount; i++)
7178 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7179 if (optIsLoopClonable(i))
7181 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7183 optIdentifyLoopOptInfo(i, context);
7186 JITDUMP("------------------------------------------------------------\n");
7191 //------------------------------------------------------------------------
7192 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7193 // check if the loop is suitable for the optimizations performed.
7196 // loopNum - the current loop index for which conditions are derived.
7197 // context - data structure where all loop cloning candidates will be
7201 // If the loop is not suitable for the optimizations, return false - context
7202 // should not contain any optimization candidate for the loop if false.
7203 // Else return true.
7206 // Check if the loop is well formed for this optimization and identify the
7207 // optimization candidates and update the "context" parameter with all the
7208 // contextual information necessary to perform the optimization later.
7210 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7212 noway_assert(loopNum < optLoopCount);
7214 LoopDsc* pLoop = &optLoopTable[loopNum];
7216 if (!(pLoop->lpFlags & LPFLG_ITER))
7218 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7222 unsigned ivLclNum = pLoop->lpIterVar();
7223 if (lvaVarAddrExposed(ivLclNum))
7225 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7229 BasicBlock* head = pLoop->lpHead;
7230 BasicBlock* end = pLoop->lpBottom;
7231 BasicBlock* beg = head->bbNext;
7233 if (end->bbJumpKind != BBJ_COND)
7235 JITDUMP("> Couldn't find termination test.\n");
7239 if (end->bbJumpDest != beg)
7241 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7245 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7246 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7248 JITDUMP("> Loop iteration operator not matching\n");
7252 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 &&
7253 (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7254 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7256 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7260 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7261 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7263 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7264 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7268 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7270 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n", pLoop->lpTestTree->gtTreeID);
7275 GenTreePtr op1 = pLoop->lpIterator();
7276 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7279 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum, end->bbNext ? end->bbNext->bbNum : 0);
7281 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7282 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7285 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7288 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7295 //---------------------------------------------------------------------------------------------------------------
7296 // optExtractArrIndex: Try to extract the array index from "tree".
7299 // tree the tree to be checked if it is the array [] operation.
7300 // result the extracted GT_INDEX information is updated in result.
7301 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7304 // Returns true if array index can be extracted, else, return false. See assumption about
7305 // what will be extracted. The "result" variable's rank parameter is advanced for every
7306 // dimension of [] encountered.
7309 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7310 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7311 // to reconstruct this to be able to know if this is an array access.
7314 // The method extracts only if the array base and indices are GT_LCL_VAR.
7316 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7318 // [000000001AF828D8] ---XG------- indir int
7319 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7320 // [000000001AF87340] ------------ + byref
7321 // [000000001AF87160] -------N---- const long 2
7322 // [000000001AF871D8] ------------ << long
7323 // [000000001AF870C0] ------------ cast long <- int
7324 // [000000001AF86F30] i----------- lclVar int V04 loc0
7325 // [000000001AF87250] ------------ + byref
7326 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7327 // [000000001AF87468] ---XG------- comma int
7328 // [000000001AF87020] ---X-------- arrBndsChk void
7329 // [000000001AF86FA8] ---X-------- arrLen int
7330 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7331 // [000000001AF82860] ------------ lclVar int V04 loc0
7332 // [000000001AF829F0] -A-XG------- = int
7333 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7335 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7337 if (tree->gtOper != GT_COMMA)
7341 GenTreePtr before = tree->gtGetOp1();
7342 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7346 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7347 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7351 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7355 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7356 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7361 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7363 GenTreePtr after = tree->gtGetOp2();
7365 if (after->gtOper != GT_IND)
7369 GenTreePtr sibo = after->gtGetOp1();
7370 if (sibo->gtOper != GT_ADD)
7374 GenTreePtr sib = sibo->gtGetOp1();
7375 GenTreePtr ofs = sibo->gtGetOp2();
7376 if (ofs->gtOper != GT_CNS_INT)
7380 if (sib->gtOper != GT_ADD)
7384 GenTreePtr si = sib->gtGetOp2();
7385 GenTreePtr base = sib->gtGetOp1();
7386 if (si->gtOper != GT_LSH)
7390 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7394 GenTreePtr scale = si->gtGetOp2();
7395 GenTreePtr index = si->gtGetOp1();
7396 if (scale->gtOper != GT_CNS_INT)
7400 #ifdef _TARGET_AMD64_
7401 if (index->gtOper != GT_CAST)
7405 GenTreePtr indexVar = index->gtGetOp1();
7407 GenTreePtr indexVar = index;
7409 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7413 if (lhsNum == BAD_VAR_NUM)
7415 result->arrLcl = arrLcl;
7417 result->indLcls.Push(indLcl);
7418 result->bndsChks.Push(tree);
7419 result->useBlock = compCurBB;
7425 //---------------------------------------------------------------------------------------------------------------
7426 // optReconstructArrIndex: Reconstruct array index.
7429 // tree the tree to be checked if it is an array [][][] operation.
7430 // result the extracted GT_INDEX information.
7431 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7434 // Returns true if array index can be extracted, else, return false. "rank" field in
7435 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7438 // Recursively look for a list of array indices. In the example below, we encounter,
7439 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7440 // The return value would then be:
7441 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7443 // V00[V01][V02] would be morphed as:
7445 // [000000001B366848] ---XG------- indir int
7446 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7447 // [000000001B36C200] ---XG------- comma int
7448 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7449 // [000000001B36C278] -A-XG------- comma int
7450 // [000000001B366730] R--XG------- indir ref
7451 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7452 // [000000001B36C818] ---XG------- comma ref
7453 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7454 // [000000001B36BB60] -A-XG------- = ref
7455 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7456 // [000000001B36A668] -A-XG------- = int
7457 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7460 // The method extracts only if the array base and indices are GT_LCL_VAR.
7462 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7464 // If we can extract "tree" (which is a top level comma) return.
7465 if (optExtractArrIndex(tree, result, lhsNum))
7469 // We have a comma (check if array base expr is computed in "before"), descend further.
7470 else if (tree->OperGet() == GT_COMMA)
7472 GenTreePtr before = tree->gtGetOp1();
7473 // "before" should evaluate an array base for the "after" indexing.
7474 if (before->OperGet() != GT_ASG)
7478 GenTreePtr lhs = before->gtGetOp1();
7479 GenTreePtr rhs = before->gtGetOp2();
7481 // "rhs" should contain an GT_INDEX
7482 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7486 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7487 GenTreePtr after = tree->gtGetOp2();
7488 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7489 return optExtractArrIndex(after, result, lhsNum);
7495 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7497 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*) data->pCallbackData);
7501 //-------------------------------------------------------------------------
7502 // optIsStackLocalInvariant: Is stack local invariant in loop.
7505 // loopNum The loop in which the variable is tested for invariance.
7506 // lclNum The local that is tested for invariance in the loop.
7509 // Returns true if the variable is loop invariant in loopNum.
7511 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7513 if (lvaVarAddrExposed(lclNum))
7517 if (optIsVarAssgLoop(loopNum, lclNum))
7524 //----------------------------------------------------------------------------------------------
7525 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7526 // identify as potential candidate and update the loop context.
7529 // tree The tree encountered during the tree walk.
7530 // info Supplies information about the current block or stmt in which the tree is.
7531 // Also supplies the "context" pointer for updating with loop cloning
7532 // candidates. Also supplies loopNum.
7535 // If array index can be reconstructed, check if the iter var of the loop matches the
7536 // array index var in some dim. Also ensure other index vars before the identified
7537 // dim are loop invariant.
7540 // Skip sub trees if the optimization candidate is identified or else continue walking
7542 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7544 ArrIndex arrIndex(getAllocator());
7546 // Check if array index can be optimized.
7547 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7549 assert(tree->gtOper == GT_COMMA);
7553 JITDUMP("Found ArrIndex at tree ");
7555 printf(" which is equivalent to: ");
7560 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7562 return WALK_SKIP_SUBTREES;
7565 // Walk the dimensions and see if iterVar of the loop is used as index.
7566 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7568 // Is index variable also used as the loop iter var.
7569 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7571 // Check the previous indices are all loop invariant.
7572 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7574 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7576 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7577 return WALK_SKIP_SUBTREES;
7583 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7585 JITDUMP(" on dim %d\n", dim);
7588 // Update the loop context.
7589 info->context->EnsureLoopOptInfo(info->loopNum)->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7593 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(), dim);
7596 return WALK_SKIP_SUBTREES;
7598 else if (tree->gtOper == GT_ARR_ELEM)
7600 // TODO-CQ: CLONE: Implement.
7601 return WALK_SKIP_SUBTREES;
7603 return WALK_CONTINUE;
7606 struct optRangeCheckDsc
7608 Compiler* pCompiler;
7612 Walk to make sure that only locals and constants are contained in the index
7615 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr * pTree, fgWalkData *data)
7617 GenTreePtr tree = *pTree;
7618 optRangeCheckDsc* pData= (optRangeCheckDsc*) data->pCallbackData;
7620 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR ||
7621 tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7623 pData->bValidIndex = false;
7627 if (tree->gtOper == GT_LCL_VAR)
7629 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7631 pData->bValidIndex = false;
7636 return WALK_CONTINUE;
7640 returns true if a range check can legally be removed (for the moment it checks
7641 that the array is a local array (non subject to racing conditions) and that the
7642 index is either a constant or a local
7644 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7646 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7647 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7648 GenTreePtr pArray = bndsChk->GetArray();
7649 if (pArray == NULL && !bndsChk->gtArrLen->IsCnsIntOrI())
7653 GenTreePtr pIndex = bndsChk->gtIndex;
7655 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
7656 // Otherwise we can be targeted by malicious race-conditions.
7659 if ( pArray->gtOper != GT_LCL_VAR )
7665 printf("Can't remove range check if the array isn't referenced with a local\n");
7673 noway_assert(pArray->gtType == TYP_REF);
7674 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
7676 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
7678 // If the array address has been taken, don't do the optimization
7679 // (this restriction can be lowered a bit, but i don't think it's worth it)
7680 CLANG_FORMAT_COMMENT_ANCHOR;
7684 printf("Can't remove range check if the array has its address taken\n");
7694 optRangeCheckDsc Data;
7695 Data.pCompiler =this;
7696 Data.bValidIndex=true;
7698 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
7700 if (!Data.bValidIndex)
7705 printf("Can't remove range check with this index");
7717 /******************************************************************************
7719 * Replace x==null with (x|x)==0 if x is a GC-type.
7720 * This will stress code-gen and the emitter to make sure they support such trees.
7725 void Compiler::optOptimizeBoolsGcStress(BasicBlock * condBlock)
7727 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
7730 noway_assert(condBlock->bbJumpKind == BBJ_COND);
7731 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
7733 noway_assert(condStmt->gtOper == GT_JTRUE);
7738 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
7740 if (comparand == NULL || !varTypeIsGC(comparand->TypeGet()))
7743 if (comparand->gtFlags & (GTF_ASG|GTF_CALL|GTF_ORDER_SIDEEFF))
7746 GenTreePtr comparandClone = gtCloneExpr(comparand);
7748 // Bump up the ref-counts of any variables in 'comparandClone'
7749 compCurBB = condBlock;
7750 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void *)this, true);
7752 noway_assert(relop->gtOp.gtOp1 == comparand);
7753 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
7754 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
7756 // Comparand type is already checked, and we have const int, there is no harm
7757 // morphing it into a TYP_I_IMPL.
7758 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
7759 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
7764 /******************************************************************************
7765 * Function used by folding of boolean conditionals
7766 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
7767 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
7768 * being a boolean lclVar and "op2" the const 0/1.
7769 * On success, the comparand (ie. boolVal) is returned. Else NULL.
7770 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
7771 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
7772 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
7773 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
7776 GenTree * Compiler::optIsBoolCond(GenTree * condBranch,
7777 GenTree * * compPtr,
7780 bool isBool = false;
7782 noway_assert(condBranch->gtOper == GT_JTRUE);
7783 GenTree * cond = condBranch->gtOp.gtOp1;
7785 /* The condition must be "!= 0" or "== 0" */
7787 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
7790 /* Return the compare node to the caller */
7794 /* Get hold of the comparands */
7796 GenTree * opr1 = cond->gtOp.gtOp1;
7797 GenTree * opr2 = cond->gtOp.gtOp2;
7799 if (opr2->gtOper != GT_CNS_INT)
7802 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
7805 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
7807 /* Is the value a boolean?
7808 * We can either have a boolean expression (marked GTF_BOOLEAN) or
7809 * a local variable that is marked as being boolean (lvIsBoolean) */
7811 if (opr1->gtFlags & GTF_BOOLEAN)
7815 else if ((opr1->gtOper == GT_CNS_INT) &&
7816 (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
7820 else if (opr1->gtOper == GT_LCL_VAR)
7822 /* is it a boolean local variable */
7824 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
7825 noway_assert(lclNum < lvaCount);
7827 if (lvaTable[lclNum].lvIsBoolean)
7831 /* Was our comparison against the constant 1 (i.e. true) */
7834 // If this is a boolean expression tree we can reverse the relop
7835 // and change the true to false.
7838 gtReverseCond(cond);
7839 opr2->gtIntCon.gtIconVal = 0;
7850 void Compiler::optOptimizeBools()
7855 printf("*************** In optOptimizeBools()\n");
7858 printf("Blocks/Trees before phase\n");
7859 fgDispBasicBlocks(true);
7869 for (BasicBlock * b1 = fgFirstBB; b1; b1 = b1->bbNext)
7871 /* We're only interested in conditional jumps here */
7873 if (b1->bbJumpKind != BBJ_COND)
7876 /* If there is no next block, we're done */
7878 BasicBlock * b2 = b1->bbNext;
7882 /* The next block must not be marked as BBF_DONT_REMOVE */
7883 if (b2->bbFlags & BBF_DONT_REMOVE)
7886 /* The next block also needs to be a condition */
7888 if (b2->bbJumpKind != BBJ_COND)
7891 optOptimizeBoolsGcStress(b1);
7896 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
7898 if (b1->bbJumpDest == b2->bbJumpDest)
7900 /* Given the following sequence of blocks :
7904 we wil try to fold it to :
7905 B1: brtrue(t1|t2, BX)
7911 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
7913 /* Given the following sequence of blocks :
7917 we will try to fold it to :
7918 B1: brtrue((!t1)&&t2, B3)
7929 /* The second block must contain a single statement */
7931 GenTreePtr s2 = b2->bbTreeList;
7932 if (s2->gtPrev != s2)
7935 noway_assert(s2->gtOper == GT_STMT);
7936 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
7937 noway_assert(t2->gtOper == GT_JTRUE);
7939 /* Find the condition for the first block */
7941 GenTreePtr s1 = b1->bbTreeList->gtPrev;
7943 noway_assert(s1->gtOper == GT_STMT);
7944 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
7945 noway_assert(t1->gtOper == GT_JTRUE);
7947 if (b2->countOfInEdges() > 1)
7950 /* Find the branch conditions of b1 and b2 */
7954 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
7957 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
7960 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
7961 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
7963 // Leave out floats where the bit-representation is more complicated
7964 // - there are two representations for 0.
7966 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
7969 // Make sure the types involved are of the same sizes
7970 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
7972 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
7974 #ifdef _TARGET_ARMARCH_
7975 // Skip the small operand which we cannot encode.
7976 if (varTypeIsSmall(c1->TypeGet()))
7979 /* The second condition must not contain side effects */
7981 if (c2->gtFlags & GTF_GLOB_EFFECT)
7984 /* The second condition must not be too expensive */
7988 if (c2->gtCostEx > 12)
7993 var_types foldType = c1->TypeGet();
7994 if (varTypeIsGC(foldType))
7996 foldType = TYP_I_IMPL;
8001 /* Both conditions must be the same */
8003 if (t1->gtOper != t2->gtOper)
8006 if (t1->gtOper == GT_EQ)
8008 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8009 So we will branch to BX if (c1&c2)==0 */
8016 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8017 So we will branch to BX if (c1|c2)!=0 */
8025 /* The b1 condition must be the reverse of the b2 condition */
8027 if (t1->gtOper == t2->gtOper)
8030 if (t1->gtOper == GT_EQ)
8032 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8033 So we will branch to BX if (c1&c2)!=0 */
8040 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8041 So we will branch to BX if (c1|c2)==0 */
8048 // Anding requires both values to be 0 or 1
8050 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8054 // Now update the trees
8056 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8059 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8060 cmpOp1->gtFlags |= GTF_BOOLEAN;
8064 t1->gtOp.gtOp1 = cmpOp1;
8065 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8067 #if FEATURE_SET_FLAGS
8068 // For comparisons against zero we will have the GTF_SET_FLAGS set
8069 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8070 // during the CSE phase.
8072 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8073 // as they are no longer feeding directly into a comparisons against zero
8075 // Make sure that the GTF_SET_FLAGS bit is cleared.
8076 // Fix 388436 ARM JitStress WP7
8077 c1->gtFlags &= ~GTF_SET_FLAGS;
8078 c2->gtFlags &= ~GTF_SET_FLAGS;
8080 // The new top level node that we just created does feed directly into
8081 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8082 // we generate an instuction that sets the flags, which allows us
8083 // to omit the cmp with zero instruction.
8085 // Request that the codegen for cmpOp1 sets the condition flags
8086 // when it generates the code for cmpOp1.
8088 cmpOp1->gtRequestSetFlags();
8091 flowList * edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8094 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8098 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8102 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8104 fgRemoveRefPred(b1->bbJumpDest, b1);
8106 b1->bbJumpDest = b2->bbJumpDest;
8108 fgAddRefPred(b2->bbJumpDest, b1);
8111 noway_assert(edge1 != NULL);
8112 noway_assert(edge2 != NULL);
8114 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8115 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8116 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8118 edge1->flEdgeWeightMin = edgeSumMin;
8119 edge1->flEdgeWeightMax = edgeSumMax;
8123 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8124 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8127 /* Get rid of the second block (which is a BBJ_COND) */
8129 noway_assert(b1->bbJumpKind == BBJ_COND);
8130 noway_assert(b2->bbJumpKind == BBJ_COND);
8131 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8132 noway_assert(b1->bbNext == b2); noway_assert(b2->bbNext);
8135 b2->bbFlags |= BBF_REMOVED;
8137 // If b2 was the last block of a try or handler, update the EH table.
8139 ehUpdateForDeletedBlock(b2);
8141 /* Update bbRefs and bbPreds */
8143 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8144 * Remove pred 'b2' for 'b2->bbJumpDest' */
8146 fgReplacePred(b2->bbNext, b2, b1);
8148 fgRemoveRefPred(b2->bbJumpDest, b2);
8150 /* Update the block numbers and try again */
8162 // Update loop table
8163 fgUpdateLoopsAfterCompacting(b1, b2);
8168 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n",
8169 c2->OperIsLeaf() ? "" : "non-leaf ",
8170 b1->bbNum, b2->bbNum);
8171 gtDispTree(s1); printf("\n");
8179 fgDebugCheckBBlist();