1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
827 optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
830 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
832 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
834 else if (limitOp->gtOper == GT_ARR_LENGTH)
836 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
842 // Save the type of the comparison between the iterator and the limit.
843 optLoopTable[loopInd].lpTestTree = relop;
847 //----------------------------------------------------------------------------------
848 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
851 // incr The incr tree to be checked. Whether incr tree is
852 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
855 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
856 // and the rhs limit is extracted from the "test" tree. The limit information is
857 // added to the loop table.
860 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
862 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
865 genTreeOps updateOper;
866 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
867 if (iterVar != BAD_VAR_NUM)
869 // We have v = v op y type asg node.
882 // Increment should be by a const int.
883 // TODO-CQ: CLONE: allow variable increments.
884 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
893 //----------------------------------------------------------------------------------
894 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
897 // from, to - are blocks (beg, end) which are part of the loop.
898 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
899 // pIterVar - see return value.
902 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
906 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
907 // assigned in the loop.
909 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
912 unsigned iterVar = optIsLoopIncrTree(incr);
913 if (iterVar == BAD_VAR_NUM)
917 if (optIsVarAssigned(from, to, incr, iterVar))
919 JITDUMP("iterVar is assigned in loop\n");
927 //----------------------------------------------------------------------------------
928 // optIsLoopTestEvalIntoTemp:
929 // Pattern match if the test tree is computed into a tmp
930 // and the "tmp" is used as jump condition for loop termination.
933 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
934 // where Vtmp contains the actual loop test result.
935 // newStmt - contains the statement that is the actual test stmt involving
936 // the loop iterator.
939 // Returns true if a new test tree can be obtained.
942 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
943 // Then returns the rhs for def of Vtmp as the "test" node.
946 // This method just retrieves what it thinks is the "test" node,
947 // the callers are expected to verify that "iterVar" is used in the test.
949 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
951 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
953 if (test->gtOper != GT_JTRUE)
958 GenTreePtr relop = test->gtGetOp1();
959 noway_assert(relop->OperIsCompare());
961 GenTreePtr opr1 = relop->gtOp.gtOp1;
962 GenTreePtr opr2 = relop->gtOp.gtOp2;
964 // Make sure we have jtrue (vtmp != 0)
965 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
966 opr2->IsIntegralConst(0))
968 // Get the previous statement to get the def (rhs) of Vtmp to see
969 // if the "test" is evaluated into Vtmp.
970 GenTreePtr prevStmt = testStmt->gtPrev;
971 if (prevStmt == nullptr)
976 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
977 if (tree->OperGet() == GT_ASG)
979 GenTreePtr lhs = tree->gtOp.gtOp1;
980 GenTreePtr rhs = tree->gtOp.gtOp2;
982 // Return as the new test node.
983 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
985 if (rhs->OperIsCompare())
996 //----------------------------------------------------------------------------------
997 // optExtractInitTestIncr:
998 // Extract the "init", "test" and "incr" nodes of the loop.
1001 // head - Loop head block
1002 // bottom - Loop bottom block
1003 // top - Loop top block
1004 // ppInit - The init stmt of the loop if found.
1005 // ppTest - The test stmt of the loop if found.
1006 // ppIncr - The incr stmt of the loop if found.
1009 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1010 // returns true. Returns false if the information can't be extracted.
1013 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1014 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1015 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1018 // +-------<-----------------<-----------+
1021 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1023 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1024 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1025 // in the jtrue condition.
1028 // This method just retrieves what it thinks is the "test" node,
1029 // the callers are expected to verify that "iterVar" is used in the test.
1031 bool Compiler::optExtractInitTestIncr(
1032 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1034 assert(ppInit != nullptr);
1035 assert(ppTest != nullptr);
1036 assert(ppIncr != nullptr);
1038 // Check if last two statements in the loop body are the increment of the iterator
1039 // and the loop termination test.
1040 noway_assert(bottom->bbTreeList != nullptr);
1041 GenTreePtr test = bottom->bbTreeList->gtPrev;
1042 noway_assert(test != nullptr && test->gtNext == nullptr);
1045 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1050 // Check if we have the incr tree before the test tree, if we don't,
1051 // check if incr is part of the loop "top".
1052 GenTreePtr incr = test->gtPrev;
1053 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1055 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1060 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1061 GenTreePtr topLast = top->bbTreeList->gtPrev;
1062 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1072 assert(test != incr);
1074 // Find the last statement in the loop pre-header which we expect to be the initialization of
1075 // the loop iterator.
1076 GenTreePtr phdr = head->bbTreeList;
1077 if (phdr == nullptr)
1082 GenTreePtr init = phdr->gtPrev;
1083 noway_assert(init != nullptr && (init->gtNext == nullptr));
1085 // If it is a duplicated loop condition, skip it.
1086 if (init->gtFlags & GTF_STMT_CMPADD)
1088 bool doGetPrev = true;
1092 // Previous optimization passes may have inserted compiler-generated
1093 // statements other than duplicated loop conditions.
1094 doGetPrev = (init->gtPrev != nullptr);
1098 // Must be a duplicated loop condition.
1099 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1104 init = init->gtPrev;
1106 noway_assert(init != nullptr);
1109 noway_assert(init->gtOper == GT_STMT);
1110 noway_assert(test->gtOper == GT_STMT);
1111 noway_assert(incr->gtOper == GT_STMT);
1113 *ppInit = init->gtStmt.gtStmtExpr;
1114 *ppTest = test->gtStmt.gtStmtExpr;
1115 *ppIncr = incr->gtStmt.gtStmtExpr;
1120 /*****************************************************************************
1122 * Record the loop in the loop table.
1125 void Compiler::optRecordLoop(BasicBlock* head,
1131 unsigned char exitCnt)
1133 // Record this loop in the table, if there's room.
1135 assert(optLoopCount <= MAX_LOOP_NUM);
1136 if (optLoopCount == MAX_LOOP_NUM)
1139 loopOverflowThisMethod = true;
1144 // Assumed preconditions on the loop we're adding.
1145 assert(first->bbNum <= top->bbNum);
1146 assert(top->bbNum <= entry->bbNum);
1147 assert(entry->bbNum <= bottom->bbNum);
1148 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1150 // If the new loop contains any existing ones, add it in the right place.
1151 unsigned char loopInd = optLoopCount;
1152 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1154 unsigned char prev = prevPlus1 - 1;
1155 if (optLoopTable[prev].lpContainedBy(first, bottom))
1160 // Move up any loops if necessary.
1161 for (unsigned j = optLoopCount; j > loopInd; j--)
1163 optLoopTable[j] = optLoopTable[j - 1];
1167 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1169 // The loop is well-formed.
1170 assert(optLoopTable[i].lpWellFormed());
1171 // Check for disjoint.
1172 if (optLoopTable[i].lpDisjoint(first, bottom))
1176 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1177 assert(optLoopTable[i].lpContainedBy(first, bottom));
1181 optLoopTable[loopInd].lpHead = head;
1182 optLoopTable[loopInd].lpFirst = first;
1183 optLoopTable[loopInd].lpTop = top;
1184 optLoopTable[loopInd].lpBottom = bottom;
1185 optLoopTable[loopInd].lpEntry = entry;
1186 optLoopTable[loopInd].lpExit = exit;
1187 optLoopTable[loopInd].lpExitCnt = exitCnt;
1189 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1190 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1191 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1193 optLoopTable[loopInd].lpFlags = 0;
1195 // We haven't yet recorded any side effects.
1196 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1197 optLoopTable[loopInd].lpFieldsModified = nullptr;
1198 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1200 // If DO-WHILE loop mark it as such.
1201 if (head->bbNext == entry)
1203 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1206 // If single exit loop mark it as such.
1210 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1214 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1215 // We have the following restrictions:
1216 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1217 // 2. There must be a loop iterator (a local var) that is
1218 // incremented (decremented or lsh, rsh, mul) with a constant value
1219 // 3. The iterator is incremented exactly once
1220 // 4. The loop condition must use the iterator.
1222 if (bottom->bbJumpKind == BBJ_COND)
1227 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1232 unsigned iterVar = BAD_VAR_NUM;
1233 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1238 // Make sure the "iterVar" initialization is never skipped,
1239 // i.e. every pred of ENTRY other than HEAD is in the loop.
1240 for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
1242 BasicBlock* predBlock = predEdge->flBlock;
1243 if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
1249 if (!optPopulateInitInfo(loopInd, init, iterVar))
1254 // Check that the iterator is used in the loop condition.
1255 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1260 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1261 // Record the iterator, the pointer to the test node
1262 // and the initial value of the iterator (constant or local var)
1263 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1266 optLoopTable[loopInd].lpIterTree = incr;
1269 // Save the initial value of the iterator - can be lclVar or constant
1270 // Flag the loop accordingly.
1276 simpleTestLoopCount++;
1279 // Check if a constant iteration loop.
1280 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1282 // This is a constant loop.
1283 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1285 constIterLoopCount++;
1292 printf("\nConstant loop initializer:\n");
1295 printf("\nConstant loop body:\n");
1297 BasicBlock* block = head;
1300 block = block->bbNext;
1301 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1303 if (stmt->gtStmt.gtStmtExpr == incr)
1308 gtDispTree(stmt->gtStmt.gtStmtExpr);
1310 } while (block != bottom);
1316 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1321 //------------------------------------------------------------------------
1322 // optPrintLoopRecording: Print a recording of the loop.
1325 // loopInd - loop index.
1327 void Compiler::optPrintLoopRecording(unsigned loopInd)
1329 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1330 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1331 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1332 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1333 optLoopTable[loopInd].lpExit);
1335 // If an iterator loop print the iterator and the initialization.
1336 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1338 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1340 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1342 printf("%d )", optLoopTable[loopInd].lpIterConst());
1344 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1346 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1348 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1350 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1353 // If a simple test condition print operator and the limits */
1354 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1356 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1358 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1361 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1363 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1372 void Compiler::optCheckPreds()
1375 BasicBlock* blockPred;
1378 for (block = fgFirstBB; block; block = block->bbNext)
1380 for (pred = block->bbPreds; pred; pred = pred->flNext)
1382 // make sure this pred is part of the BB list
1383 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1385 if (blockPred == pred->flBlock)
1390 noway_assert(blockPred);
1391 switch (blockPred->bbJumpKind)
1394 if (blockPred->bbJumpDest == block)
1400 noway_assert(blockPred->bbNext == block);
1402 case BBJ_EHFILTERRET:
1404 case BBJ_EHCATCHRET:
1405 noway_assert(blockPred->bbJumpDest == block);
1416 /*****************************************************************************
1417 * Find the natural loops, using dominators. Note that the test for
1418 * a loop is slightly different from the standard one, because we have
1419 * not done a depth first reordering of the basic blocks.
1422 void Compiler::optFindNaturalLoops()
1427 printf("*************** In optFindNaturalLoops()\n");
1433 flowList* predEntry;
1435 noway_assert(fgDomsComputed);
1439 hasMethodLoops = false;
1440 loopsThisMethod = 0;
1441 loopOverflowThisMethod = false;
1444 /* We will use the following terminology:
1445 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1446 Not part of the looping of the loop.
1447 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1448 * but not the outer loop. ???)
1449 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1450 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1451 * EXIT - the loop exit or the block right after the bottom
1452 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1454 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1488 unsigned char exitCount;
1490 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1496 // Blocks that are rarely run have a zero bbWeight and should
1497 // never be optimized here
1499 if (top->bbWeight == BB_ZERO_WEIGHT)
1504 for (pred = top->bbPreds; pred; pred = pred->flNext)
1506 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1507 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1508 * as the definition says, but merely an indication that we have a loop there).
1509 * Thus, we have to be very careful and after entry discovery check that it is indeed
1510 * the only place we enter the loop (especially for non-reducible flow graphs).
1513 bottom = pred->flBlock;
1516 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1518 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1519 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1520 (bottom->bbJumpKind == BBJ_SWITCH))
1522 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1523 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1527 BasicBlock* loopBlock;
1529 /* The presence of a "back edge" is an indication that a loop might be present here
1532 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1533 * node in the loop to any other node in the loop (wholly within the loop)
1534 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1535 * in the loop from outside the loop, and that is through the ENTRY
1538 /* Let's find the loop ENTRY */
1540 if (head->bbJumpKind == BBJ_ALWAYS)
1542 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1544 /* OK - we enter somewhere within the loop */
1545 entry = head->bbJumpDest;
1547 /* some useful asserts
1548 * Cannot enter at the top - should have being caught by redundant jumps */
1550 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1554 /* special case - don't consider now */
1555 // assert (!"Loop entered in weird way!");
1559 // Can we fall through into the loop?
1560 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1562 /* The ENTRY is at the TOP (a do-while loop) */
1567 goto NO_LOOP; // head does not flow into the loop bail for now
1570 // Now we find the "first" block -- the earliest block reachable within the loop.
1571 // This is usually the same as "top", but can differ in rare cases where "top" is
1572 // the entry block of a nested loop, and that nested loop branches backwards to a
1573 // a block before "top". We find this by searching for such backwards branches
1574 // in the loop known so far.
1575 BasicBlock* first = top;
1576 BasicBlock* newFirst;
1577 bool blocksToSearch = true;
1578 BasicBlock* validatedAfter = bottom->bbNext;
1579 while (blocksToSearch)
1581 blocksToSearch = false;
1583 blocksToSearch = false;
1584 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1586 unsigned nSucc = loopBlock->NumSucc();
1587 for (unsigned j = 0; j < nSucc; j++)
1589 BasicBlock* succ = loopBlock->GetSucc(j);
1590 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1591 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1597 if (newFirst != nullptr)
1599 validatedAfter = first;
1601 blocksToSearch = true;
1605 // Is "head" still before "first"? If not, we don't have a valid loop...
1606 if (head->bbNum >= first->bbNum)
1609 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1610 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1614 /* Make sure ENTRY dominates all blocks in the loop
1615 * This is necessary to ensure condition 2. above
1616 * At the same time check if the loop has a single exit
1617 * point - those loops are easier to optimize */
1619 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1621 if (!fgDominate(entry, loopBlock))
1626 if (loopBlock == bottom)
1628 if (bottom->bbJumpKind != BBJ_ALWAYS)
1630 /* there is an exit at the bottom */
1632 noway_assert(bottom->bbJumpDest == top);
1639 BasicBlock* exitPoint;
1641 switch (loopBlock->bbJumpKind)
1644 case BBJ_CALLFINALLY:
1646 case BBJ_EHCATCHRET:
1647 assert(loopBlock->bbJumpDest);
1648 exitPoint = loopBlock->bbJumpDest;
1650 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1652 /* exit from a block other than BOTTOM */
1661 case BBJ_EHFINALLYRET:
1662 case BBJ_EHFILTERRET:
1663 /* The "try" associated with this "finally" must be in the
1664 * same loop, so the finally block will return control inside the loop */
1669 /* those are exits from the loop */
1677 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1678 BasicBlock** jumpTab;
1679 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1683 noway_assert(*jumpTab);
1684 exitPoint = *jumpTab;
1686 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1691 } while (++jumpTab, --jumpCnt);
1695 noway_assert(!"Unexpected bbJumpKind");
1700 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1701 * This is to ensure condition 1. above which prevents marking fake loops
1703 * Below is an example:
1711 * The example above is not a loop since we bail after the first iteration
1713 * The condition we have to check for is
1714 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1715 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1717 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1718 * loop bottom then we cannot iterate
1720 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1721 * part of the loop nodes (as per definition they are loop exits executed only once),
1722 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1724 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1726 switch (loopBlock->bbJumpKind)
1731 if (fgDominate(loopBlock, bottom))
1740 bool canIterateLoop = false;
1742 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1744 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1746 canIterateLoop = true;
1749 else if (predEntry->flBlock != head)
1751 // The entry block has multiple predecessors outside the loop; the 'head'
1752 // block isn't the only one. We only support a single 'head', so bail.
1757 if (!canIterateLoop)
1762 /* Double check - make sure that all loop blocks except ENTRY
1763 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1764 * us from considering non-loops due to incorrectly assuming that we had a back edge
1767 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1770 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1772 if (loopBlock == entry)
1777 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1779 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1781 // noway_assert(!"Found loop with multiple entries");
1787 // Disqualify loops where the first block of the loop is less nested in EH than
1788 // the bottom block. That is, we don't want to handle loops where the back edge
1789 // goes from within an EH region to a first block that is outside that same EH
1790 // region. Note that we *do* handle loops where the first block is the *first*
1791 // block of a more nested EH region (since it is legal to branch to the first
1792 // block of an immediately more nested EH region). So, for example, disqualify
1799 // BB10 BBJ_COND => BB02
1803 // Here, BB10 is more nested than BB02.
1805 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1807 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1809 first->bbNum, bottom->bbNum);
1813 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1814 // Disqualify loops where the first block of the loop is a finally target.
1815 // The main problem is when multiple loops share a 'first' block that is a finally
1816 // target and we canonicalize the loops by adding a new loop head. In that case, we
1817 // need to update the blocks so the finally target bit is moved to the newly created
1818 // block, and removed from the old 'first' block. This is 'hard', so at this point
1819 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1820 // long-term), it's easier to disallow the loop than to update the flow graph to
1821 // support this case.
1823 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1825 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1828 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1830 /* At this point we have a loop - record it in the loop table
1831 * If we found only one exit, record it in the table too
1832 * (otherwise an exit = 0 in the loop table means multiple exits) */
1839 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1842 if (!hasMethodLoops)
1844 /* mark the method as containing natural loops */
1846 hasMethodLoops = true;
1849 /* increment total number of loops found */
1853 /* keep track of the number of exits */
1854 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1855 #endif // COUNT_LOOPS
1858 /* current predecessor not good for a loop - continue with another one, if any */
1864 loopCountTable.record(loopsThisMethod);
1865 if (maxLoopsPerMethod < loopsThisMethod)
1867 maxLoopsPerMethod = loopsThisMethod;
1869 if (loopOverflowThisMethod)
1871 totalLoopOverflows++;
1873 #endif // COUNT_LOOPS
1875 // Now the loop indices are stable. We can figure out parent/child relationships
1876 // (using table indices to name loops), and label blocks.
1877 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1879 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1882 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1884 optLoopTable[loopInd].lpParent = possibleParent;
1885 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1886 optLoopTable[possibleParent].lpChild = loopInd;
1892 // Now label the blocks with the innermost loop to which they belong. Since parents
1893 // precede children in the table, doing the labeling for each loop in order will achieve
1894 // this -- the innermost loop labeling will be done last.
1895 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1897 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1898 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1899 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1901 blk->bbNatLoopNum = loopInd;
1906 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1910 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1911 // one, if necessary, for loops containing others that share a "top."
1913 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1915 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1916 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1922 if (optCanonicalizeLoopNest(loopInd))
1929 fgUpdateChangedFlowGraph();
1933 if (verbose && optLoopCount > 0)
1935 printf("\nFinal natural loop table:\n");
1936 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1938 optPrintLoopInfo(loopInd);
1945 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1947 BasicBlock* newJumpDest = nullptr;
1948 switch (blk->bbJumpKind)
1953 case BBJ_EHFILTERRET:
1954 case BBJ_EHFINALLYRET:
1955 case BBJ_EHCATCHRET:
1956 // These have no jump destination to update.
1961 case BBJ_CALLFINALLY:
1963 // All of these have a single jump destination to update.
1964 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1966 blk->bbJumpDest = newJumpDest;
1972 bool redirected = false;
1973 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1975 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1977 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1981 // If any redirections happend, invalidate the switch table map for the switch.
1984 GetSwitchDescMap()->Remove(blk);
1994 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1995 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1997 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1999 // copy the jump destination(s) from "from" to "to".
2000 switch (to->bbJumpKind)
2004 case BBJ_CALLFINALLY:
2006 // All of these have a single jump destination to update.
2007 to->bbJumpDest = from->bbJumpDest;
2012 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
2013 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
2014 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
2016 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
2018 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2028 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2029 // Returns 'true' if the flow graph is modified.
2030 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2032 bool modified = false;
2034 // Is the top of the current loop not in any nested loop?
2035 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2037 if (optCanonicalizeLoop(loopInd))
2043 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2044 child = optLoopTable[child].lpSibling)
2046 if (optCanonicalizeLoopNest(child))
2055 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2057 // Is the top uniquely part of the current loop?
2058 BasicBlock* t = optLoopTable[loopInd].lpTop;
2060 if (t->bbNatLoopNum == loopInd)
2065 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2067 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2069 // Otherwise, the top of this loop is also part of a nested loop.
2071 // Insert a new unique top for this loop. We must be careful to put this new
2072 // block in the correct EH region. Note that f->bbPrev might be in a different
2073 // EH region. For example:
2081 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2082 // On the other hand, the first block of multiple loops might be the first
2083 // block of a 'try' region that is completely contained in the multiple loops.
2088 // BB10 BBJ_ALWAYS => BB08
2090 // BB12 BBJ_ALWAYS => BB08
2092 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2093 // is a single-block "try" region. Neither loop "bottom" block is in the same
2094 // "try" region as BB08. This is legal because you can jump to the first block
2095 // of a try region. With EH normalization, no two "try" regions will share
2096 // this block. In this case, we need to insert a new block for the outer loop
2097 // in the same EH region as the branch from the "bottom":
2102 // BB10 BBJ_ALWAYS => BB08
2104 // BB12 BBJ_ALWAYS => BB30
2106 // Another possibility is that the "first" block of the loop nest can be the first block
2107 // of a "try" region that also has other predecessors than those in the loop, or even in
2108 // the "try" region (since blocks can target the first block of a "try" region). For example:
2112 // BB10 BBJ_ALWAYS => BB08
2114 // BB12 BBJ_ALWAYS => BB08
2117 // BB20 BBJ_ALWAYS => BB08
2119 // BB25 BBJ_ALWAYS => BB08
2121 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2122 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2123 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2124 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2125 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2126 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2127 // situation of branching to a non-first block of a 'try' region.
2129 // We can also have a loop nest where the "first" block is outside of a "try" region
2130 // and the back edges are inside a "try" region, for example:
2134 // BB09 try { BBJ_COND => BB02
2136 // BB15 BBJ_COND => BB02
2138 // BB21 } // end of "try"
2140 // In this case, both loop back edges were formed by "leave" instructions that were
2141 // imported into branches that were later made conditional. In this case, we don't
2142 // want to copy the EH region of the back edge, since that would create a block
2143 // outside of and disjoint with the "try" region of the back edge. However, to
2144 // simplify things, we disqualify this type of loop, so we should never see this here.
2146 BasicBlock* h = optLoopTable[loopInd].lpHead;
2147 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2148 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2150 // The loop must be entirely contained within a single handler region.
2151 assert(BasicBlock::sameHndRegion(f, b));
2153 // If the bottom block is in the same "try" region, then we extend the EH
2154 // region. Otherwise, we add the new block outside the "try" region.
2155 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2156 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2159 // We need to set the EH region manually. Set it to be the same
2160 // as the bottom block.
2161 newT->copyEHRegion(b);
2164 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2166 // Redirect the "bottom" of the current loop to "newT".
2167 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2168 blockMap->Set(t, newT);
2169 optRedirectBlock(b, blockMap);
2171 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2172 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2173 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2174 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2177 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2178 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2179 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2180 // edge of the blockMap, so nothing will happen.
2181 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2183 BasicBlock* topPredBlock = topPred->flBlock;
2185 // Skip if topPredBlock is in the loop.
2186 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2187 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2188 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2189 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2191 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2192 "redirecting its bottom edge\n",
2193 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2197 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2199 optRedirectBlock(topPredBlock, blockMap);
2202 assert(newT->bbNext == f);
2205 newT->bbJumpKind = BBJ_ALWAYS;
2206 newT->bbJumpDest = t;
2207 newT->bbTreeList = nullptr;
2208 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2211 // If it had been a do-while loop (top == entry), update entry, as well.
2212 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2213 if (optLoopTable[loopInd].lpTop == origE)
2215 optLoopTable[loopInd].lpEntry = newT;
2217 optLoopTable[loopInd].lpTop = newT;
2218 optLoopTable[loopInd].lpFirst = newT;
2220 newT->bbNatLoopNum = loopInd;
2222 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2223 dspPtr(newT), loopInd);
2225 // Make sure the head block still goes to the entry...
2226 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2228 h->bbJumpKind = BBJ_ALWAYS;
2229 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2231 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2233 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2234 optLoopTable[loopInd].lpHead = h2;
2235 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2236 h2->bbTreeList = nullptr;
2237 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2240 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2241 // it must be the case that they were do-while's (since "h" fell through to the entry).
2242 // The new node "newT" becomes the head of such loops.
2243 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2244 childLoop = optLoopTable[childLoop].lpSibling)
2246 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2247 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2249 optUpdateLoopHead(childLoop, h, newT);
2255 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2257 assert(l1 != BasicBlock::NOT_IN_LOOP);
2262 else if (l2 == BasicBlock::NOT_IN_LOOP)
2268 return optLoopContains(l1, optLoopTable[l2].lpParent);
2272 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2274 assert(optLoopTable[loopInd].lpHead == from);
2275 optLoopTable[loopInd].lpHead = to;
2276 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2277 childLoop = optLoopTable[childLoop].lpSibling)
2279 if (optLoopTable[childLoop].lpHead == from)
2281 optUpdateLoopHead(childLoop, from, to);
2286 /*****************************************************************************
2287 * If the : i += const" will cause an overflow exception for the small types.
2290 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2297 type_MAX = SCHAR_MAX;
2300 type_MAX = UCHAR_MAX;
2303 type_MAX = SHRT_MAX;
2306 type_MAX = USHRT_MAX;
2309 case TYP_UINT: // Detected by checking for 32bit ....
2311 return false; // ... overflow same as done for TYP_INT
2317 if (iterAtExit > type_MAX)
2327 /*****************************************************************************
2328 * If the "i -= const" will cause an underflow exception for the small types
2331 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2338 type_MIN = SCHAR_MIN;
2341 type_MIN = SHRT_MIN;
2350 case TYP_UINT: // Detected by checking for 32bit ....
2352 return false; // ... underflow same as done for TYP_INT
2358 if (iterAtExit < type_MIN)
2368 /*****************************************************************************
2370 * Helper for unroll loops - Computes the number of repetitions
2371 * in a constant loop. If it cannot prove the number is constant returns false
2374 bool Compiler::optComputeLoopRep(int constInit,
2377 genTreeOps iterOper,
2378 var_types iterOperType,
2379 genTreeOps testOper,
2382 unsigned* iterCount)
2384 noway_assert(genActualType(iterOperType) == TYP_INT);
2387 __int64 constLimitX;
2392 // Using this, we can just do a signed comparison with other 32 bit values.
2395 constLimitX = (unsigned int)constLimit;
2399 constLimitX = (signed int)constLimit;
2402 switch (iterOperType)
2404 // For small types, the iteration operator will narrow these values if big
2406 #define INIT_ITER_BY_TYPE(type) \
2407 constInitX = (type)constInit; \
2408 iterInc = (type)iterInc;
2411 INIT_ITER_BY_TYPE(signed char);
2414 INIT_ITER_BY_TYPE(unsigned char);
2417 INIT_ITER_BY_TYPE(signed short);
2420 INIT_ITER_BY_TYPE(unsigned short);
2423 // For the big types, 32 bit arithmetic is performed
2429 constInitX = (unsigned int)constInit;
2433 constInitX = (signed int)constInit;
2438 noway_assert(!"Bad type");
2442 /* If iterInc is zero we have an infinite loop */
2448 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2449 iterSign = (iterInc > 0) ? +1 : -1;
2451 /* Initialize loopCount to zero */
2454 // If dupCond is true then the loop head contains a test which skips
2455 // this loop, if the constInit does not pass the loop test
2456 // Such a loop can execute zero times.
2457 // If dupCond is false then we have a true do-while loop which we
2458 // always execute the loop once before performing the loop test
2462 constInitX += iterInc;
2465 // bail if count is based on wrap-around math
2468 if (constLimitX < constInitX)
2473 else if (constLimitX > constInitX)
2478 /* Compute the number of repetitions */
2482 __int64 iterAtExitX;
2485 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2489 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2490 * have a constant number of iterations or loop forever -
2491 * we have to compute (lim-init) mod iterInc to see if it is zero.
2492 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2493 * which is probably not what the end user wanted, but it is legal.
2498 /* Stepping by one, i.e. Mod with 1 is always zero */
2501 if (((constLimitX - constInitX) % iterInc) != 0)
2509 noway_assert(iterInc < 0);
2510 /* Stepping by -1, i.e. Mod with 1 is always zero */
2513 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2529 if (constInitX != constLimitX)
2531 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2534 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2538 iterAtExitX = (unsigned)iterAtExitX;
2541 // Check if iteration incr will cause overflow for small types
2542 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2547 // iterator with 32bit overflow. Bad for TYP_(U)INT
2548 if (iterAtExitX < constLimitX)
2553 *iterCount = loopCount;
2569 noway_assert(!"Unknown operator for loop iterator");
2583 if (constInitX < constLimitX)
2585 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2588 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2592 iterAtExitX = (unsigned)iterAtExitX;
2595 // Check if iteration incr will cause overflow for small types
2596 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2601 // iterator with 32bit overflow. Bad for TYP_(U)INT
2602 if (iterAtExitX < constLimitX)
2607 *iterCount = loopCount;
2623 noway_assert(!"Unknown operator for loop iterator");
2637 if (constInitX <= constLimitX)
2639 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2642 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2646 iterAtExitX = (unsigned)iterAtExitX;
2649 // Check if iteration incr will cause overflow for small types
2650 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2655 // iterator with 32bit overflow. Bad for TYP_(U)INT
2656 if (iterAtExitX <= constLimitX)
2661 *iterCount = loopCount;
2677 noway_assert(!"Unknown operator for loop iterator");
2691 if (constInitX > constLimitX)
2693 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2696 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2700 iterAtExitX = (unsigned)iterAtExitX;
2703 // Check if small types will underflow
2704 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2709 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2710 if (iterAtExitX > constLimitX)
2715 *iterCount = loopCount;
2731 noway_assert(!"Unknown operator for loop iterator");
2745 if (constInitX >= constLimitX)
2747 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2750 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2754 iterAtExitX = (unsigned)iterAtExitX;
2757 // Check if small types will underflow
2758 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2763 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2764 if (iterAtExitX >= constLimitX)
2769 *iterCount = loopCount;
2785 noway_assert(!"Unknown operator for loop iterator");
2790 noway_assert(!"Unknown operator for loop condition");
2796 /*****************************************************************************
2798 * Look for loop unrolling candidates and unroll them
2802 #pragma warning(push)
2803 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2805 void Compiler::optUnrollLoops()
2807 if (compCodeOpt() == SMALL_CODE)
2812 if (optLoopCount == 0)
2818 if (JitConfig.JitNoUnroll())
2824 if (optCanCloneLoops())
2832 printf("*************** In optUnrollLoops()\n");
2835 /* Look for loop unrolling candidates */
2837 /* Double loop so that after unrolling an inner loop we set change to true
2838 * and we then go back over all of the loop candidates and try to unroll
2839 * the next outer loop, until we don't unroll any loops,
2840 * then change will be false and we are done.
2844 bool change = false;
2846 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2860 int lbeg; // initial value for iterator
2861 int llim; // limit value for iterator
2862 unsigned lvar; // iterator lclVar #
2863 int iterInc; // value to increment the iterator
2864 genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.)
2865 var_types iterOperType; // type result of the oper (for overflow instrs)
2866 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2867 bool unsTest; // Is the comparison u/int
2869 unsigned totalIter; // total number of iterations in the constant loop
2870 unsigned loopCostSz; // Cost is size of one iteration
2871 unsigned loopFlags; // actual lpFlags
2872 unsigned requiredFlags; // required lpFlags
2874 GenTree* loopList; // new stmt list of the unrolled loop
2877 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2884 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2885 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2887 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2890 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2896 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2903 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2904 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2906 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2909 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2911 unrollLimitSz *= 10;
2915 loopFlags = optLoopTable[lnum].lpFlags;
2916 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2918 /* Ignore the loop if we don't have a do-while with a single exit
2919 that has a constant number of iterations */
2921 if ((loopFlags & requiredFlags) != requiredFlags)
2926 /* ignore if removed or marked as not unrollable */
2928 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2933 head = optLoopTable[lnum].lpHead;
2935 bottom = optLoopTable[lnum].lpBottom;
2936 noway_assert(bottom);
2938 /* The single exit must be at the bottom of the loop */
2939 noway_assert(optLoopTable[lnum].lpExit);
2940 if (optLoopTable[lnum].lpExit != bottom)
2945 /* Unrolling loops with jumps in them is not worth the headache
2946 * Later we might consider unrolling loops after un-switching */
2951 block = block->bbNext;
2952 noway_assert(block);
2954 if (block->bbJumpKind != BBJ_NONE)
2956 if (block != bottom)
2961 } while (block != bottom);
2963 /* Get the loop data:
2967 - iterator increment
2968 - increment operation type (i.e. ADD, SUB, etc...)
2969 - loop test type (i.e. GT_GE, GT_LT, etc...)
2972 lbeg = optLoopTable[lnum].lpConstInit;
2973 llim = optLoopTable[lnum].lpConstLimit();
2974 testOper = optLoopTable[lnum].lpTestOper();
2976 lvar = optLoopTable[lnum].lpIterVar();
2977 iterInc = optLoopTable[lnum].lpIterConst();
2978 iterOper = optLoopTable[lnum].lpIterOper();
2980 iterOperType = optLoopTable[lnum].lpIterOperType();
2981 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2983 if (lvaTable[lvar].lvAddrExposed)
2984 { // If the loop iteration variable is address-exposed then bail
2987 if (lvaTable[lvar].lvIsStructField)
2988 { // If the loop iteration variable is a promoted field from a struct then
2993 /* Locate the pre-header and initialization and increment/test statements */
2995 phdr = head->bbTreeList;
2997 loop = bottom->bbTreeList;
3000 init = head->lastStmt();
3001 noway_assert(init && (init->gtNext == nullptr));
3002 test = bottom->lastStmt();
3003 noway_assert(test && (test->gtNext == nullptr));
3004 incr = test->gtPrev;
3007 if (init->gtFlags & GTF_STMT_CMPADD)
3009 /* Must be a duplicated loop condition */
3010 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3013 init = init->gtPrev;
3021 /* Find the number of iterations - the function returns false if not a constant number */
3023 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3028 /* Forget it if there are too many repetitions or not a constant loop */
3030 if (totalIter > iterLimit)
3035 noway_assert(init->gtOper == GT_STMT);
3036 init = init->gtStmt.gtStmtExpr;
3037 noway_assert(test->gtOper == GT_STMT);
3038 test = test->gtStmt.gtStmtExpr;
3039 noway_assert(incr->gtOper == GT_STMT);
3040 incr = incr->gtStmt.gtStmtExpr;
3042 // Don't unroll loops we don't understand.
3043 if (incr->gtOper != GT_ASG)
3047 incr = incr->gtOp.gtOp2;
3049 /* Make sure everything looks ok */
3050 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3051 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3052 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3054 !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3055 (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3056 (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3058 (test->gtOper != GT_JTRUE))
3060 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3064 /* heuristic - Estimated cost in code size of the unrolled loop */
3072 block = block->bbNext;
3074 /* Visit all the statements in the block */
3076 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3078 /* Get the expression and stop if end reached */
3080 GenTreePtr expr = stmt->gtStmtExpr;
3086 /* Calculate gtCostSz */
3087 gtSetStmtInfo(stmt);
3089 /* Update loopCostSz */
3090 loopCostSz += stmt->gtCostSz;
3092 } while (block != bottom);
3094 /* Compute the estimated increase in code size for the unrolled loop */
3096 unsigned int fixedLoopCostSz;
3097 fixedLoopCostSz = 8;
3100 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3102 /* Don't unroll if too much code duplication would result. */
3104 if (unrollCostSz > unrollLimitSz)
3106 /* prevent this loop from being revisited */
3107 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3111 /* Looks like a good idea to unroll this loop, let's do it! */
3112 CLANG_FORMAT_COMMENT_ANCHOR;
3117 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3118 if (head->bbNext->bbNum != bottom->bbNum)
3120 printf("..BB%02u", bottom->bbNum);
3122 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3123 printf(" unrollCostSz = %d\n", unrollCostSz);
3128 /* Create the unrolled loop statement list */
3130 loopList = loopLast = nullptr;
3132 for (lval = lbeg; totalIter; totalIter--)
3141 block = block->bbNext;
3142 noway_assert(block);
3144 /* Visit all the statements in the block */
3146 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3148 /* Stop if we've reached the end of the loop */
3150 if (stmt->gtStmtExpr == incr)
3155 /* Clone/substitute the expression */
3157 expr = gtCloneExpr(stmt, 0, lvar, lval);
3159 // cloneExpr doesn't handle everything
3163 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3167 /* Append the expression to our list */
3171 loopLast->gtNext = expr;
3178 expr->gtPrev = loopLast;
3181 } while (block != bottom);
3183 /* update the new value for the unrolled iterator */
3197 noway_assert(!"Unrolling not implemented for this loop iterator");
3201 noway_assert(!"Unknown operator for constant loop iterator");
3206 /* Finish the linked list */
3210 loopList->gtPrev = loopLast;
3211 loopLast->gtNext = nullptr;
3214 /* Replace the body with the unrolled one */
3220 block = block->bbNext;
3221 noway_assert(block);
3222 block->bbTreeList = nullptr;
3223 block->bbJumpKind = BBJ_NONE;
3224 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3225 } while (block != bottom);
3227 bottom->bbJumpKind = BBJ_NONE;
3228 bottom->bbTreeList = loopList;
3229 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3230 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3234 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3236 /* Update bbRefs and bbPreds */
3237 /* Here head->bbNext is bottom !!! - Replace it */
3239 fgRemoveRefPred(head->bbNext, bottom);
3241 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3242 * (the last value of the iterator in the loop)
3243 * and drop the jump condition since the unrolled loop will always execute */
3245 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3247 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3249 if (head->bbJumpKind == BBJ_COND)
3251 phdr = head->bbTreeList;
3253 test = phdr->gtPrev;
3255 noway_assert(test && (test->gtNext == nullptr));
3256 noway_assert(test->gtOper == GT_STMT);
3257 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3259 init = test->gtPrev;
3260 noway_assert(init && (init->gtNext == test));
3261 noway_assert(init->gtOper == GT_STMT);
3263 init->gtNext = nullptr;
3264 phdr->gtPrev = init;
3265 head->bbJumpKind = BBJ_NONE;
3266 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3268 /* Update bbRefs and bbPreds */
3270 fgRemoveRefPred(head->bbJumpDest, head);
3274 /* the loop must execute */
3275 noway_assert(head->bbJumpKind == BBJ_NONE);
3281 printf("Whole unrolled loop:\n");
3283 GenTreePtr s = loopList;
3287 noway_assert(s->gtOper == GT_STMT);
3298 /* Remember that something has changed */
3302 /* Make sure to update loop table */
3304 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3305 * (also make head and bottom NULL - to hit an assert or GPF) */
3307 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3308 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3320 fgDebugCheckBBlist();
3324 #pragma warning(pop)
3327 /*****************************************************************************
3329 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3330 * not execute a method call.
3333 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3335 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3336 // as some helper calls are neither interruptible nor hijackable.
3337 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3338 // those helpers too.
3340 noway_assert(topBB->bbNum <= botBB->bbNum);
3342 // We can always check topBB and botBB for any gc safe points and early out
3344 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3349 // Otherwise we will need to rely upon the dominator sets
3351 if (!fgDomsComputed)
3353 // return a conservative answer of true when we don't have the dominator sets
3357 BasicBlock* curBB = topBB;
3360 noway_assert(curBB);
3362 // If we added a loop pre-header block then we will
3363 // have a bbNum greater than fgLastBB, and we won't have
3364 // any dominator information about this block, so skip it.
3366 if (curBB->bbNum <= fgLastBB->bbNum)
3368 noway_assert(curBB->bbNum <= botBB->bbNum);
3370 // Does this block contain a gc safe point?
3372 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3374 // Will this block always execute on the way to botBB ?
3376 // Since we are checking every block in [topBB .. botBB] and we are using
3377 // a lexical definition of a loop.
3378 // (all that we know is that is that botBB is a back-edge to topBB)
3379 // Thus while walking blocks in this range we may encounter some blocks
3380 // that are not really part of the loop, and so we need to perform
3381 // some additional checks:
3383 // We will check that the current 'curBB' is reachable from 'topBB'
3384 // and that it dominates the block containing the back-edge 'botBB'
3385 // When both of these are true then we know that the gcsafe point in 'curBB'
3386 // will be encountered in the loop and we can return false
3388 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3395 // If we've reached the destination block, then we're done
3404 curBB = curBB->bbNext;
3407 // If we didn't find any blocks that contained a gc safe point and
3408 // also met the fgDominate and fgReachable criteria then we must return true
3413 /*****************************************************************************
3415 * Find the loop termination test at the bottom of the loop
3418 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3420 GenTreePtr testt = bottom->bbTreeList;
3422 assert(testt && testt->gtOper == GT_STMT);
3424 GenTreePtr result = testt->gtPrev;
3427 while (testt->gtNext)
3429 testt = testt->gtNext;
3432 assert(testt == result);
3438 /*****************************************************************************
3439 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3442 void Compiler::fgOptWhileLoop(BasicBlock* block)
3444 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3445 noway_assert(compCodeOpt() != SMALL_CODE);
3448 Optimize while loops into do { } while loop
3449 Our loop hoisting logic requires do { } while loops.
3450 Specifically, we're looking for the following case:
3461 If we find this, and the condition is simple enough, we change
3462 the loop to the following:
3467 // else fall-through
3478 /* Does the BB end with an unconditional jump? */
3480 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3481 { // It can't be one of the ones we use for our exception magic
3485 // It has to be a forward jump
3486 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3488 if (fgIsForwardBranch(block) == false)
3493 // Get hold of the jump target
3494 BasicBlock* bTest = block->bbJumpDest;
3496 // Does the block consist of 'jtrue(cond) block' ?
3497 if (bTest->bbJumpKind != BBJ_COND)
3502 // bTest must be a backwards jump to block->bbNext
3503 if (bTest->bbJumpDest != block->bbNext)
3508 // Since test is a BBJ_COND it will have a bbNext
3509 noway_assert(bTest->bbNext);
3511 // 'block' must be in the same try region as the condition, since we're going to insert
3512 // a duplicated condition in 'block', and the condition might include exception throwing code.
3513 if (!BasicBlock::sameTryRegion(block, bTest))
3518 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3519 // same try region (or no try region) to avoid generating illegal flow.
3520 BasicBlock* bTestNext = bTest->bbNext;
3521 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3526 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3528 // bTest must only contain only a jtrue with no other stmts, we will only clone
3529 // the conditional, so any other statements will not get cloned
3530 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3532 if (bTest->bbTreeList != condStmt)
3537 /* Get to the condition node from the statement tree */
3539 noway_assert(condStmt->gtOper == GT_STMT);
3541 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3542 noway_assert(condTree->gtOper == GT_JTRUE);
3544 condTree = condTree->gtOp.gtOp1;
3546 // The condTree has to be a RelOp comparison
3547 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3549 if (condTree->OperIsCompare() == false)
3554 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3556 gtPrepareCost(condTree);
3557 unsigned estDupCostSz = condTree->gtCostSz;
3559 double loopIterations = (double)BB_LOOP_WEIGHT;
3561 bool allProfileWeightsAreValid = false;
3562 BasicBlock::weight_t weightBlock = block->bbWeight;
3563 BasicBlock::weight_t weightTest = bTest->bbWeight;
3564 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3566 // If we have profile data then we calculate the number of time
3567 // the loop will iterate into loopIterations
3568 if (fgIsUsingProfileWeights())
3570 // Only rely upon the profile weight when all three of these blocks
3571 // have good profile weights
3572 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3573 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3575 allProfileWeightsAreValid = true;
3577 // If this while loop never iterates then don't bother transforming
3578 if (weightNext == 0)
3583 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3584 // if the profile weights are all valid.
3586 // weightNext is the number of time this loop iterates
3587 // weightBlock is the number of times that we enter the while loop
3588 // loopIterations is the average number of times that this loop iterates
3590 if (weightTest >= weightBlock)
3592 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3597 unsigned maxDupCostSz = 32;
3599 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3600 // set loop weights yet
3601 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3606 // If this loop iterates a lot then raise the maxDupCost
3607 if (loopIterations >= 12.0)
3611 if (loopIterations >= 96.0)
3616 // If the loop condition has a shared static helper, we really want this loop converted
3617 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3618 // be executed on every loop iteration.
3619 int countOfHelpers = 0;
3620 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3622 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3624 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3627 // If the compare has too high cost then we don't want to dup
3629 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3634 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3635 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3636 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3637 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3638 allProfileWeightsAreValid ? "true" : "false");
3647 /* Looks good - duplicate the condition test */
3649 condTree->gtFlags |= GTF_RELOP_ZTT;
3651 condTree = gtCloneExpr(condTree);
3652 gtReverseCond(condTree);
3654 // Make sure clone expr copied the flag
3655 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3657 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3659 /* Create a statement entry out of the condition and
3660 append the condition test at the end of 'block' */
3662 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3664 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3666 if (opts.compDbgInfo)
3668 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3671 // Flag the block that received the copy as potentially having an array/vtable
3672 // reference if the block copied from did; this is a conservative guess.
3673 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3675 block->bbFlags |= copyFlags;
3678 // If we have profile data for all blocks and we know that we are cloning the
3679 // bTest block into block and thus changing the control flow from block so
3680 // that it no longer goes directly to bTest anymore, we have to adjust the
3681 // weight of bTest by subtracting out the weight of block.
3683 if (allProfileWeightsAreValid)
3686 // Some additional sanity checks before adjusting the weight of bTest
3688 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3690 // Get the two edge that flow out of bTest
3691 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3692 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3694 // Calculate the new weight for block bTest
3696 BasicBlock::weight_t newWeightTest =
3697 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3698 bTest->bbWeight = newWeightTest;
3700 if (newWeightTest == BB_ZERO_WEIGHT)
3702 bTest->bbFlags |= BBF_RUN_RARELY;
3703 // All out edge weights are set to zero
3704 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3705 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3706 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3707 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3711 // Update the our edge weights
3712 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3713 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3714 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3715 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3720 /* Change the block to end with a conditional jump */
3722 block->bbJumpKind = BBJ_COND;
3723 block->bbJumpDest = bTest->bbNext;
3725 /* Mark the jump dest block as being a jump target */
3726 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3728 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3730 fgAddRefPred(block->bbNext, block);
3732 fgRemoveRefPred(bTest, block);
3733 fgAddRefPred(bTest->bbNext, block);
3738 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3740 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3742 gtDispTree(copyOfCondStmt);
3748 /*****************************************************************************
3750 * Optimize the BasicBlock layout of the method
3753 void Compiler::optOptimizeLayout()
3755 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3760 printf("*************** In optOptimizeLayout()\n");
3764 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3765 fgDebugCheckBBlist();
3768 noway_assert(fgModified == false);
3770 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3772 /* Make sure the appropriate fields are initialized */
3774 if (block->bbWeight == BB_ZERO_WEIGHT)
3776 /* Zero weighted block can't have a LOOP_HEAD flag */
3777 noway_assert(block->isLoopHead() == false);
3781 assert(block->bbLoopNum == 0);
3783 if (compCodeOpt() != SMALL_CODE)
3785 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3787 fgOptWhileLoop(block);
3793 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3794 fgComputeEdgeWeights();
3797 fgUpdateFlowGraph(true);
3799 fgUpdateFlowGraph();
3802 /*****************************************************************************
3804 * Perform loop inversion, find and classify natural loops
3807 void Compiler::optOptimizeLoops()
3809 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3814 printf("*************** In optOptimizeLoops()\n");
3818 optSetBlockWeights();
3820 /* Were there any loops in the flow graph? */
3824 /* now that we have dominator information we can find loops */
3826 optFindNaturalLoops();
3828 unsigned loopNum = 0;
3830 /* Iterate over the flow graph, marking all loops */
3832 /* We will use the following terminology:
3833 * top - the first basic block in the loop (i.e. the head of the backward edge)
3834 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3835 * lastBottom - used when we have multiple back-edges to the same top
3842 for (top = fgFirstBB; top; top = top->bbNext)
3844 BasicBlock* foundBottom = nullptr;
3846 for (pred = top->bbPreds; pred; pred = pred->flNext)
3848 /* Is this a loop candidate? - We look for "back edges" */
3850 BasicBlock* bottom = pred->flBlock;
3852 /* is this a backward edge? (from BOTTOM to TOP) */
3854 if (top->bbNum > bottom->bbNum)
3859 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3861 if (top->isLoopHead() == false)
3866 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3868 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3873 /* the top block must be able to reach the bottom block */
3874 if (!fgReachable(top, bottom))
3879 /* Found a new loop, record the longest backedge in foundBottom */
3881 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3883 foundBottom = bottom;
3891 /* Mark the loop header as such */
3892 assert(FitsIn<unsigned char>(loopNum));
3893 top->bbLoopNum = (unsigned char)loopNum;
3896 /* Mark all blocks between 'top' and 'bottom' */
3898 optMarkLoopBlocks(top, foundBottom, false);
3901 // We track at most 255 loops
3905 totalUnnatLoopOverflows++;
3912 totalUnnatLoopCount += loopNum;
3920 printf("\nFound a total of %d loops.", loopNum);
3921 printf("\nAfter loop weight marking:\n");
3922 fgDispBasicBlocks();
3927 optLoopsMarked = true;
3931 //------------------------------------------------------------------------
3932 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3935 // loopNum - the current loop index for which conditions are derived.
3936 // context - data structure where all loop cloning info is kept.
3939 // "false" if conditions cannot be obtained. "true" otherwise.
3940 // The cloning conditions are updated in the "conditions"[loopNum] field
3941 // of the "context" parameter.
3944 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3945 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3946 // condition is "less than". If the initializer is "var" init then adds condition
3947 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3948 // are added to "context". These conditions are checked in the pre-header block
3949 // and the cloning choice is made.
3952 // Callers should assume AND operation is used i.e., if all conditions are
3953 // true, then take the fast path.
3955 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3957 JITDUMP("------------------------------------------------------------\n");
3958 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3960 LoopDsc* loop = &optLoopTable[loopNum];
3961 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3963 if (loop->lpTestOper() == GT_LT)
3965 // Stride conditions
3966 if (loop->lpIterConst() <= 0)
3968 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3973 if (loop->lpFlags & LPFLG_CONST_INIT)
3975 // Only allowing const init at this time.
3976 if (loop->lpConstInit < 0)
3978 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3982 else if (loop->lpFlags & LPFLG_VAR_INIT)
3985 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3986 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3987 context->EnsureConditions(loopNum)->Push(geZero);
3991 JITDUMP("> Not variable init\n");
3997 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3999 int limit = loop->lpConstLimit();
4002 JITDUMP("> limit %d is invalid\n", limit);
4005 ident = LC_Ident(limit, LC_Ident::Const);
4007 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
4009 unsigned limitLcl = loop->lpVarLimit();
4010 ident = LC_Ident(limitLcl, LC_Ident::Var);
4012 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
4014 context->EnsureConditions(loopNum)->Push(geZero);
4016 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
4018 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
4019 if (!loop->lpArrLenLimit(this, index))
4021 JITDUMP("> ArrLen not matching");
4024 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4026 // Ensure that this array must be dereference-able, before executing the actual condition.
4027 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4028 context->EnsureDerefs(loopNum)->Push(array);
4032 JITDUMP("> Undetected limit\n");
4036 for (unsigned i = 0; i < optInfos->Size(); ++i)
4038 LcOptInfo* optInfo = optInfos->GetRef(i);
4039 switch (optInfo->GetOptType())
4041 case LcOptInfo::LcJaggedArray:
4044 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4045 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4046 LC_Ident arrLenIdent = LC_Ident(arrLen);
4048 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4049 context->EnsureConditions(loopNum)->Push(cond);
4051 // Ensure that this array must be dereference-able, before executing the actual condition.
4052 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4053 context->EnsureDerefs(loopNum)->Push(array);
4056 case LcOptInfo::LcMdArray:
4058 // limit <= mdArrLen
4059 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4060 LC_Condition cond(GT_LE, LC_Expr(ident),
4061 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4062 mdArrInfo->GetArrIndexForDim(getAllocator()),
4063 mdArrInfo->dim, LC_Array::None))));
4064 context->EnsureConditions(loopNum)->Push(cond);
4069 JITDUMP("Unknown opt\n");
4073 JITDUMP("Conditions: (");
4074 DBEXEC(verbose, context->PrintConditions(loopNum));
4081 //------------------------------------------------------------------------------------
4082 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4085 // loopNum - the current loop index for which conditions are derived.
4086 // context - data structure where all loop cloning info is kept.
4089 // "false" if conditions cannot be obtained. "true" otherwise.
4090 // The deref conditions are updated in the "derefConditions"[loopNum] field
4091 // of the "context" parameter.
4093 // Definition of Deref Conditions:
4094 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4095 // we should first be able to dereference "a". i.e., "a" is non-null.
4101 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4102 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4105 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4106 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4107 // be true to do the deref.
4109 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4111 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4112 // blocks each of which will branch to slow path if the condition evaluates to false.
4114 // Now, imagine a situation where we have
4115 // a[x][y][k] = 20 and a[i][j][k] = 0
4116 // also in the inner most loop where x, y are parameters, then our conditions will have
4120 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4122 // But these conditions can be checked together with conditions
4123 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4126 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4127 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4128 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4129 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4131 // This naturally yields a tree style pattern, where the nodes of the tree are
4132 // the array and indices respectively.
4148 // Notice that the variables in the same levels can have their conditions combined in the
4149 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4150 // combined with a short-circuiting AND (i.e., different basic blocks).
4153 // Construct a tree of array indices and the array which will generate the optimal
4154 // conditions for loop cloning.
4156 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4171 // In this method, we will construct such a tree by descending depth first into the array
4172 // index operation and forming a tree structure as we encounter the array or the index variables.
4174 // This tree structure will then be used to generate conditions like below:
4175 // (a != null) & (b != null) && // from the first level of the tree.
4177 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4178 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4180 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4181 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4186 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4188 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4191 // Get the dereference-able arrays.
4192 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4194 // For each array in the dereference list, construct a tree,
4195 // where the nodes are array and index variables and an edge 'u-v'
4196 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4197 // 'u-v-w' transitively if u[v][w] occurs.
4198 for (unsigned i = 0; i < deref->Size(); ++i)
4200 LC_Array& array = (*deref)[i];
4202 // First populate the array base variable.
4203 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4204 if (node == nullptr)
4206 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4210 // For each dimension (level) for the array, populate the tree with the variable
4211 // from that dimension.
4212 unsigned rank = (unsigned)array.GetDimRank();
4213 for (unsigned i = 0; i < rank; ++i)
4215 node->EnsureChildren(getAllocator());
4216 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4219 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4220 node->children->Push(tmp);
4223 // Descend one level down.
4227 // Keep the maxRank of all array dereferences.
4228 maxRank = max((int)rank, maxRank);
4234 for (unsigned i = 0; i < nodes.Size(); ++i)
4251 // First level will always yield the null-check, since it is made of the array base variables.
4252 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4253 // So add 1 after rank * 2.
4254 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4256 // Heuristic to not create too many blocks;
4262 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4263 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4264 for (unsigned i = 0; i < nodes.Size(); ++i)
4266 nodes[i]->DeriveLevelConditions(levelCond);
4269 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4274 //----------------------------------------------------------------------------
4275 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4278 // block - the block in which the helper call needs to be inserted.
4279 // insertBefore - the tree before which the helper call will be inserted.
4281 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4283 if (JitConfig.JitDebugLogLoopCloning() == 0)
4287 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4288 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4289 fgInsertStmtBefore(block, insertBefore, stmt);
4290 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4294 //------------------------------------------------------------------------
4295 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4296 // candidates gathered during the cloning phase.
4299 // loopNum - the current loop index for which the optimizations are performed.
4300 // context - data structure where all loop cloning info is kept.
4301 // dynamicPath - If true, the optimization is performed in the fast path among the
4302 // cloned loops. If false, it means this is the only path (i.e.,
4303 // there is no slow path.)
4306 // Perform the optimizations on the fast path i.e., the path in which the
4307 // optimization candidates were collected at the time of identifying them.
4308 // The candidates store all the information necessary (the tree/stmt/block
4309 // they are from) to perform the optimization.
4312 // The unoptimized path is either already cloned when this method is called or
4313 // there is no unoptimized path (got eliminated statically.) So this method
4314 // performs the optimizations assuming that the path in which the candidates
4315 // were collected is the fast path in which the optimizations will be performed.
4317 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4319 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4320 for (unsigned i = 0; i < optInfos->Size(); ++i)
4322 LcOptInfo* optInfo = optInfos->GetRef(i);
4323 switch (optInfo->GetOptType())
4325 case LcOptInfo::LcJaggedArray:
4327 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4328 compCurBB = arrIndexInfo->arrIndex.useBlock;
4329 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4331 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4334 case LcOptInfo::LcMdArray:
4335 // TODO-CQ: CLONE: Implement.
4343 //----------------------------------------------------------------------------
4344 // optCanCloneLoops: Use the environment flag to determine whether loop
4345 // cloning is allowed to be performed.
4348 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4349 // Disabled for retail for now.
4351 bool Compiler::optCanCloneLoops()
4353 // Enabled for retail builds now.
4354 unsigned cloneLoopsFlag = 1;
4356 cloneLoopsFlag = JitConfig.JitCloneLoops();
4358 return (cloneLoopsFlag != 0);
4361 //----------------------------------------------------------------------------
4362 // optIsLoopClonable: Determine whether this loop can be cloned.
4365 // loopInd loop index which needs to be checked if it can be cloned.
4368 // Returns true if the loop can be cloned. If it returns false
4369 // prints a message in debug as why the loop can't be cloned.
4371 bool Compiler::optIsLoopClonable(unsigned loopInd)
4373 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4374 // inserting new EH regions in the exception table yet.
4375 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4376 unsigned loopRetCount = 0;
4377 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4379 if (blk->bbJumpKind == BBJ_RETURN)
4383 if (bbIsTryBeg(blk))
4385 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4390 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4391 // into the middle of a handler (to go to the cloned copy.) Reject.
4392 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4394 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4398 // If the head and entry are in different EH regions, reject.
4399 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4401 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4405 // Is the first block after the last block of the loop a handler or filter start?
4406 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4407 // and go to where the original loop did. That raises problems when we don't actually go to
4408 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4409 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4410 // loop. This is just a corner to cut to get this working faster.
4411 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4412 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4414 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4418 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4419 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4420 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4421 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4422 if (fgReturnCount + loopRetCount > 4)
4424 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4425 "would exceed the limit of 4.\n",
4426 loopRetCount, fgReturnCount);
4430 // Otherwise, we're going to add those return blocks.
4431 fgReturnCount += loopRetCount;
4436 /*****************************************************************************
4438 * Identify loop cloning opportunities, derive loop cloning conditions,
4439 * perform loop cloning, use the derived conditions to choose which
4442 void Compiler::optCloneLoops()
4444 JITDUMP("\n*************** In optCloneLoops()\n");
4445 if (optLoopCount == 0 || !optCanCloneLoops())
4453 printf("Blocks/Trees at start of phase\n");
4454 fgDispBasicBlocks(true);
4458 LoopCloneContext context(optLoopCount, getAllocator());
4460 // Obtain array optimization candidates in the context.
4461 optObtainLoopCloningOpts(&context);
4463 // For each loop, derive cloning conditions for the optimization candidates.
4464 for (unsigned i = 0; i < optLoopCount; ++i)
4466 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4467 if (optInfos == nullptr)
4472 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4474 JITDUMP("> Conditions could not be obtained\n");
4475 context.CancelLoopOptInfo(i);
4479 bool allTrue = false;
4480 bool anyFalse = false;
4481 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4484 context.CancelLoopOptInfo(i);
4488 // Perform static optimizations on the fast path since we always
4489 // have to take the cloned path.
4490 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4492 // No need to clone.
4493 context.CancelLoopOptInfo(i);
4499 // The code in this #if has been useful in debugging loop cloning issues, by
4500 // enabling selective enablement of the loop cloning optimization according to
4503 unsigned methHash = info.compMethodHash();
4504 char* lostr = getenv("loopclonehashlo");
4505 unsigned methHashLo = 0;
4508 sscanf_s(lostr, "%x", &methHashLo);
4509 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4511 char* histr = getenv("loopclonehashhi");
4512 unsigned methHashHi = UINT32_MAX;
4515 sscanf_s(histr, "%x", &methHashHi);
4516 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4518 if (methHash < methHashLo || methHash > methHashHi)
4523 for (unsigned i = 0; i < optLoopCount; ++i)
4525 if (context.GetLoopOptInfo(i) != nullptr)
4528 context.OptimizeConditions(i DEBUGARG(verbose));
4529 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4530 optCloneLoop(i, &context);
4537 printf("\nAfter loop cloning:\n");
4538 fgDispBasicBlocks(/*dumpTrees*/ true);
4543 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4545 assert(loopInd < optLoopCount);
4547 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4548 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4549 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4551 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4552 unsigned depth = optLoopDepth(loopInd);
4553 unsigned ambientWeight = 1;
4554 for (unsigned j = 0; j < depth; j++)
4556 unsigned lastWeight = ambientWeight;
4557 ambientWeight *= BB_LOOP_WEIGHT;
4558 // If the multiplication overflowed, stick at max.
4559 // (Strictly speaking, a multiplication could overflow and still have a result
4560 // that is >= lastWeight...but if so, the original weight must be pretty large,
4561 // and it got bigger, so that's OK.)
4562 if (ambientWeight < lastWeight)
4564 ambientWeight = BB_MAX_WEIGHT;
4569 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4570 // Be safe by taking the max with the head block's weight.
4571 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4573 // This is the containing loop, if any -- to label any blocks we create that are outside
4574 // the loop being cloned.
4575 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4577 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4578 optEnsureUniqueHead(loopInd, ambientWeight);
4580 // We're going to make
4592 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4604 BasicBlock* h = optLoopTable[loopInd].lpHead;
4605 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4607 // Make a new block to be the unique entry to the loop.
4608 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4609 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4610 /*extendRegion*/ true);
4611 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4612 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4613 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4614 newH->bbNatLoopNum = ambientLoop;
4616 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4619 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4620 // "newPred" will be the predecessor of the blocks of the cloned loop.
4621 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4622 BasicBlock* newPred = b;
4623 if (b->bbJumpKind != BBJ_ALWAYS)
4625 BasicBlock* x = b->bbNext;
4628 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4629 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4631 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4632 x2->bbNatLoopNum = ambientLoop;
4635 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4640 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4641 // so that "h" already falls through to "e" (e == t == f).
4642 BasicBlock* h2 = nullptr;
4643 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4645 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4646 /*extendRegion*/ true);
4647 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4649 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4650 h2->bbNatLoopNum = ambientLoop;
4652 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4653 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4656 // Now we'll clone the blocks of the loop body.
4657 BasicBlock* newFirst = nullptr;
4658 BasicBlock* newBot = nullptr;
4660 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4661 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4664 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4665 /*extendRegion*/ true);
4667 // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
4668 // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
4669 // checked them to guarantee they are clonable.
4670 bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
4671 noway_assert(cloneOk);
4672 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4673 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4674 // loop, if one exists -- the parent of the loop we're cloning.
4675 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4677 if (newFirst == nullptr)
4681 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4683 blockMap->Set(blk, newBlk);
4686 // Perform the static optimizations on the fast path.
4687 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4689 // Now go through the new blocks, remapping their jump targets within the loop.
4690 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4694 BasicBlock* newblk = nullptr;
4695 bool b = blockMap->Lookup(blk, &newblk);
4696 assert(b && newblk != nullptr);
4698 assert(blk->bbJumpKind == newblk->bbJumpKind);
4700 // First copy the jump destination(s) from "blk".
4701 optCopyBlkDest(blk, newblk);
4703 // Now redirect the new block according to "blockMap".
4704 optRedirectBlock(newblk, blockMap);
4707 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4708 (h->bbJumpKind == BBJ_ALWAYS));
4710 // If all the conditions are true, go to E2.
4711 BasicBlock* e2 = nullptr;
4712 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4714 h->bbJumpKind = BBJ_COND;
4716 // We will create the following structure
4718 // cond0 (in h) -?> cond1
4719 // slow --> e2 (slow) always
4726 // We should always have block conditions, at the minimum, the array should be deref-able
4727 assert(context->HasBlockConditions(loopInd));
4729 // Create a unique header for the slow path.
4730 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4731 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4732 slowHead->bbNatLoopNum = ambientLoop;
4733 slowHead->bbJumpDest = e2;
4735 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4736 condLast->bbJumpDest = slowHead;
4738 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4741 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4743 assert(foundIt && e2 != nullptr);
4745 fgUpdateChangedFlowGraph();
4748 //--------------------------------------------------------------------------------------------------
4749 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4752 // context loop cloning context variable
4753 // loopNum the loop index
4754 // head loop head for "loopNum"
4755 // slowHead the slow path loop head
4761 // Create the following structure.
4763 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4764 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4766 // cond0 (in h) -?> cond1
4767 // slowHead --> e2 (slowHead) always
4768 // !cond1 -?> slowHead
4769 // !cond2 -?> slowHead
4771 // !condn -?> slowHead
4774 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4776 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4779 BasicBlock* slowHead)
4781 JITDUMP("Inserting loop cloning conditions\n");
4782 assert(context->HasBlockConditions(loopNum));
4784 BasicBlock* curCond = head;
4785 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4786 for (unsigned i = 0; i < levelCond->Size(); ++i)
4788 bool isHeaderBlock = (curCond == head);
4790 // Flip the condition if header block.
4791 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4793 // Create each condition block ensuring wiring between them.
4794 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4795 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4798 curCond->inheritWeight(head);
4799 curCond->bbNatLoopNum = head->bbNatLoopNum;
4800 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4803 // Finally insert cloning conditions after all deref conditions have been inserted.
4804 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4808 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4810 BasicBlock* h = optLoopTable[loopInd].lpHead;
4811 BasicBlock* t = optLoopTable[loopInd].lpTop;
4812 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4813 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4815 // If "h" dominates the entry block, then it is the unique header.
4816 if (fgDominate(h, e))
4821 // Otherwise, create a new empty header block, make it the pred of the entry block,
4822 // and redirect the preds of the entry block to go to this.
4824 BasicBlock* beforeTop = t->bbPrev;
4825 // Make sure that the new block is in the same region as the loop.
4826 // (We will only create loops that are entirely within a region.)
4827 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4828 // This is in the containing loop.
4829 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4830 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4832 // We don't care where it was put; splice it between beforeTop and top.
4833 if (beforeTop->bbNext != h2)
4835 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4836 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4840 if (h2->bbNext != e)
4842 h2->bbJumpKind = BBJ_ALWAYS;
4845 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4847 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4848 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4849 blockMap->Set(e, h2);
4851 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4853 BasicBlock* predBlock = predEntry->flBlock;
4855 // Skip if predBlock is in the loop.
4856 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4860 optRedirectBlock(predBlock, blockMap);
4863 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4866 /*****************************************************************************
4868 * Determine the kind of interference for the call.
4871 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4873 assert(call->gtOper == GT_CALL);
4875 // if not a helper, kills everything
4876 if (call->gtCall.gtCallType != CT_HELPER)
4881 // setfield and array address store kill all indirections
4882 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4884 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4885 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4886 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4887 case CORINFO_HELP_SETFIELDOBJ:
4888 case CORINFO_HELP_ARRADDR_ST:
4890 return CALLINT_REF_INDIRS;
4892 case CORINFO_HELP_SETFIELDFLOAT:
4893 case CORINFO_HELP_SETFIELDDOUBLE:
4894 case CORINFO_HELP_SETFIELD8:
4895 case CORINFO_HELP_SETFIELD16:
4896 case CORINFO_HELP_SETFIELD32:
4897 case CORINFO_HELP_SETFIELD64:
4899 return CALLINT_SCL_INDIRS;
4901 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4902 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4903 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4904 case CORINFO_HELP_SETFIELDSTRUCT:
4906 return CALLINT_ALL_INDIRS;
4912 // other helpers kill nothing
4913 return CALLINT_NONE;
4916 /*****************************************************************************
4918 * See if the given tree can be computed in the given precision (which must
4919 * be smaller than the type of the tree for this to make sense). If 'doit'
4920 * is false, we merely check to see whether narrowing is possible; if we
4921 * get called with 'doit' being true, we actually perform the narrowing.
4924 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4930 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4932 /* Assume we're only handling integer types */
4933 noway_assert(varTypeIsIntegral(srct));
4934 noway_assert(varTypeIsIntegral(dstt));
4936 unsigned srcSize = genTypeSize(srct);
4937 unsigned dstSize = genTypeSize(dstt);
4939 /* dstt must be smaller than srct to narrow */
4940 if (dstSize >= srcSize)
4945 /* Figure out what kind of a node we have */
4946 oper = tree->OperGet();
4947 kind = tree->OperKind();
4949 if (kind & GTK_ASGOP)
4951 noway_assert(doit == false);
4955 ValueNumPair NoVNPair = ValueNumPair();
4957 if (kind & GTK_LEAF)
4961 /* Constants can usually be narrowed by changing their value */
4962 CLANG_FORMAT_COMMENT_ANCHOR;
4964 #ifndef _TARGET_64BIT_
4969 lval = tree->gtIntConCommon.LngValue();
4998 if ((lval & lmask) != lval)
5003 tree->ChangeOperConst(GT_CNS_INT);
5004 tree->gtType = TYP_INT;
5005 tree->gtIntCon.gtIconVal = (int)lval;
5006 if (vnStore != nullptr)
5008 fgValueNumberTreeConst(tree);
5018 ival = tree->gtIntCon.gtIconVal;
5037 #ifdef _TARGET_64BIT_
5044 #endif // _TARGET_64BIT_
5049 if ((ival & imask) != ival)
5054 #ifdef _TARGET_64BIT_
5057 tree->gtType = TYP_INT;
5058 tree->gtIntCon.gtIconVal = (int)ival;
5059 if (vnStore != nullptr)
5061 fgValueNumberTreeConst(tree);
5064 #endif // _TARGET_64BIT_
5068 /* Operands that are in memory can usually be narrowed
5069 simply by changing their gtType */
5072 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5073 if (dstSize == sizeof(int))
5086 noway_assert(doit == false);
5090 if (kind & (GTK_BINOP | GTK_UNOP))
5093 op1 = tree->gtOp.gtOp1;
5095 op2 = tree->gtOp.gtOp2;
5097 switch (tree->gtOper)
5100 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5102 // Is op2 a small constant than can be narrowed into dstt?
5103 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5104 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5106 // We will change the type of the tree and narrow op2
5110 tree->gtType = genActualType(dstt);
5111 tree->SetVNs(vnpNarrow);
5113 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5114 // We may also need to cast away the upper bits of op1
5117 assert(tree->gtType == TYP_INT);
5118 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5120 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5122 tree->gtOp.gtOp1 = op1;
5133 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5135 noway_assert(doit == false);
5143 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5144 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5146 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5147 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5149 noway_assert(doit == false);
5153 /* Simply change the type of the tree */
5157 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5159 tree->gtFlags &= ~GTF_MUL_64RSLT;
5162 tree->gtType = genActualType(dstt);
5163 tree->SetVNs(vnpNarrow);
5171 /* Simply change the type of the tree */
5173 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5175 tree->gtType = genSignedType(dstt);
5176 tree->SetVNs(vnpNarrow);
5178 /* Make sure we don't mess up the variable type */
5179 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5181 tree->gtFlags |= GTF_VAR_CAST;
5194 /* These can always be narrowed since they only represent 0 or 1 */
5199 var_types cast = tree->CastToType();
5200 var_types oprt = op1->TypeGet();
5201 unsigned oprSize = genTypeSize(oprt);
5208 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5213 if (tree->gtOverflow())
5218 /* Is this a cast from the type we're narrowing to or a smaller one? */
5220 if (oprSize <= dstSize)
5222 /* Bash the target type of the cast */
5226 dstt = genSignedType(dstt);
5228 if (oprSize == dstSize)
5230 // Same size: change the CAST into a NOP
5231 tree->ChangeOper(GT_NOP);
5232 tree->gtType = dstt;
5233 tree->gtOp.gtOp2 = nullptr;
5234 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5238 // oprSize is smaller
5239 assert(oprSize < dstSize);
5241 // Change the CastToType in the GT_CAST node
5242 tree->CastToType() = dstt;
5244 // The result type of a GT_CAST is never a small type.
5245 // Use genActualType to widen dstt when it is a small types.
5246 tree->gtType = genActualType(dstt);
5247 tree->SetVNs(vnpNarrow);
5257 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5259 /* Simply change the type of the tree */
5263 tree->gtType = genActualType(dstt);
5264 tree->SetVNs(vnpNarrow);
5271 noway_assert(doit == false);
5279 /*****************************************************************************
5281 * The following logic figures out whether the given variable is assigned
5282 * somewhere in a list of basic blocks (or in an entire loop).
5285 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5287 GenTreePtr tree = *pTree;
5289 if (tree->OperKind() & GTK_ASGOP)
5291 GenTreePtr dest = tree->gtOp.gtOp1;
5292 genTreeOps destOper = dest->OperGet();
5294 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5295 assert(desc && desc->ivaSelf == desc);
5297 if (destOper == GT_LCL_VAR)
5299 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5300 if (tvar < lclMAX_ALLSET_TRACKED)
5302 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5306 desc->ivaMaskIncomplete = true;
5309 if (tvar == desc->ivaVar)
5311 if (tree != desc->ivaSkip)
5317 else if (destOper == GT_LCL_FLD)
5319 /* We can't track every field of every var. Moreover, indirections
5320 may access different parts of the var as different (but
5321 overlapping) fields. So just treat them as indirect accesses */
5323 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5324 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5326 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5327 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5329 else if (destOper == GT_CLS_VAR)
5331 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5333 else if (destOper == GT_IND)
5335 /* Set the proper indirection bits */
5337 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5338 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5341 else if (tree->gtOper == GT_CALL)
5343 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5344 assert(desc && desc->ivaSelf == desc);
5346 desc->ivaMaskCall = optCallInterf(tree);
5349 return WALK_CONTINUE;
5352 /*****************************************************************************/
5354 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5359 desc.ivaSkip = skip;
5361 desc.ivaSelf = &desc;
5364 desc.ivaMaskCall = CALLINT_NONE;
5365 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5371 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5373 noway_assert(stmt->gtOper == GT_STMT);
5374 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5396 /*****************************************************************************/
5397 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5401 /* Get hold of the loop descriptor */
5403 noway_assert(lnum < optLoopCount);
5404 loop = optLoopTable + lnum;
5406 /* Do we already know what variables are assigned within this loop? */
5408 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5415 /* Prepare the descriptor used by the tree walker call-back */
5417 desc.ivaVar = (unsigned)-1;
5418 desc.ivaSkip = nullptr;
5420 desc.ivaSelf = &desc;
5422 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5423 desc.ivaMaskInd = VR_NONE;
5424 desc.ivaMaskCall = CALLINT_NONE;
5425 desc.ivaMaskIncomplete = false;
5427 /* Now walk all the statements of the loop */
5429 beg = loop->lpHead->bbNext;
5430 end = loop->lpBottom;
5432 for (/**/; /**/; beg = beg->bbNext)
5436 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5438 noway_assert(stmt->gtOper == GT_STMT);
5439 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5441 if (desc.ivaMaskIncomplete)
5443 loop->lpFlags |= LPFLG_ASGVARS_INC;
5453 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5454 loop->lpAsgInds = desc.ivaMaskInd;
5455 loop->lpAsgCall = desc.ivaMaskCall;
5457 /* Now we know what variables are assigned in the loop */
5459 loop->lpFlags |= LPFLG_ASGVARS_YES;
5462 /* Now we can finally test the caller's mask against the loop's */
5463 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5468 switch (loop->lpAsgCall)
5472 /* Can't hoist if the call might have side effect on an indirection. */
5474 if (loop->lpAsgInds != VR_NONE)
5481 case CALLINT_REF_INDIRS:
5483 /* Can't hoist if the call might have side effect on an ref indirection. */
5485 if (loop->lpAsgInds & VR_IND_REF)
5492 case CALLINT_SCL_INDIRS:
5494 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5496 if (loop->lpAsgInds & VR_IND_SCL)
5503 case CALLINT_ALL_INDIRS:
5505 /* Can't hoist if the call might have side effect on any indirection. */
5507 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5516 /* Other helpers kill nothing */
5521 noway_assert(!"Unexpected lpAsgCall value");
5527 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5532 printf("\nHoisting a copy of ");
5533 printTreeID(origExpr);
5534 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5535 optLoopTable[lnum].lpBottom->bbNum);
5536 gtDispTree(origExpr);
5541 // This loop has to be in a form that is approved for hoisting.
5542 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5544 // Create a copy of the expression and mark it for CSE's.
5545 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5547 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5548 assert(hoistExpr != origExpr);
5549 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5551 GenTreePtr hoist = hoistExpr;
5552 // The value of the expression isn't used (unless it's an assignment).
5553 if (hoistExpr->OperGet() != GT_ASG)
5555 hoist = gtUnusedValNode(hoistExpr);
5558 /* Put the statement in the preheader */
5560 fgCreateLoopPreHeader(lnum);
5562 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5563 assert(preHead->bbJumpKind == BBJ_NONE);
5565 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5566 // (or in this case, will contain) the expression.
5567 compCurBB = preHead;
5569 // Increment the ref counts of any local vars appearing in "hoist".
5570 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5571 // fold away some of the lcl vars referenced by "hoist".
5572 lvaRecursiveIncRefCounts(hoist);
5574 hoist = fgMorphTree(hoist);
5576 GenTreePtr hoistStmt = gtNewStmt(hoist);
5577 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5579 /* simply append the statement at the end of the preHead's list */
5581 GenTreePtr treeList = preHead->bbTreeList;
5585 /* append after last statement */
5587 GenTreePtr last = treeList->gtPrev;
5588 assert(last->gtNext == nullptr);
5590 last->gtNext = hoistStmt;
5591 hoistStmt->gtPrev = last;
5592 treeList->gtPrev = hoistStmt;
5596 /* Empty pre-header - store the single statement in the block */
5598 preHead->bbTreeList = hoistStmt;
5599 hoistStmt->gtPrev = hoistStmt;
5602 hoistStmt->gtNext = nullptr;
5607 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5612 if (fgStmtListThreaded)
5614 gtSetStmtInfo(hoistStmt);
5615 fgSetStmtSeq(hoistStmt);
5619 if (m_nodeTestData != nullptr)
5622 // What is the depth of the loop "lnum"?
5624 unsigned lnumIter = lnum;
5625 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5628 lnumIter = optLoopTable[lnumIter].lpParent;
5631 NodeToTestDataMap* testData = GetNodeTestData();
5633 TestLabelAndNum tlAndN;
5634 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5636 if (tlAndN.m_num == -1)
5639 printTreeID(origExpr);
5640 printf(" was declared 'do not hoist', but is being hoisted.\n");
5643 else if (tlAndN.m_num != depth)
5646 printTreeID(origExpr);
5647 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5649 tlAndN.m_num, depth);
5654 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5655 // hoist" annotations.
5656 testData->Remove(origExpr);
5657 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5658 tlAndN.m_tl = TL_CSE_Def;
5659 tlAndN.m_num = m_loopHoistCSEClass++;
5660 testData->Set(hoistExpr, tlAndN);
5666 #if LOOP_HOIST_STATS
5667 if (!m_curLoopHasHoistedExpression)
5669 m_loopsWithHoistedExpressions++;
5670 m_curLoopHasHoistedExpression = true;
5672 m_totalHoistedExpressions++;
5673 #endif // LOOP_HOIST_STATS
5676 void Compiler::optHoistLoopCode()
5678 // If we don't have any loops in the method then take an early out now.
5679 if (optLoopCount == 0)
5685 unsigned jitNoHoist = JitConfig.JitNoHoist();
5693 // The code in this #if has been useful in debugging loop cloning issues, by
5694 // enabling selective enablement of the loop cloning optimization according to
5697 unsigned methHash = info.compMethodHash();
5698 char* lostr = getenv("loophoisthashlo");
5699 unsigned methHashLo = 0;
5702 sscanf_s(lostr, "%x", &methHashLo);
5703 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5705 char* histr = getenv("loophoisthashhi");
5706 unsigned methHashHi = UINT32_MAX;
5709 sscanf_s(histr, "%x", &methHashHi);
5710 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5712 if (methHash < methHashLo || methHash > methHashHi)
5714 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5716 #endif // 0 -- debugging loop cloning issues
5721 printf("\n*************** In optHoistLoopCode()\n");
5722 printf("Blocks/Trees before phase\n");
5723 fgDispBasicBlocks(true);
5728 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5729 // they are invariant.)
5730 LoopHoistContext hoistCtxt(this);
5731 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5733 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5738 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5740 optHoistLoopNest(lnum, &hoistCtxt);
5749 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5750 fgDispBasicBlocks(true);
5754 // Make sure that the predecessor lists are accurate
5755 fgDebugCheckBBlist();
5760 // Test Data stuff..
5761 // If we have no test data, early out.
5762 if (m_nodeTestData == nullptr)
5766 NodeToTestDataMap* testData = GetNodeTestData();
5767 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5769 TestLabelAndNum tlAndN;
5770 GenTreePtr node = ki.Get();
5771 bool b = testData->Lookup(node, &tlAndN);
5773 if (tlAndN.m_tl != TL_LoopHoist)
5777 // Otherwise, it is a loop hoist annotation.
5778 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5779 if (tlAndN.m_num >= 0)
5783 printf(" was declared 'must hoist', but has not been hoisted.\n");
5790 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5792 // Do this loop, then recursively do all nested loops.
5793 CLANG_FORMAT_COMMENT_ANCHOR;
5795 #if LOOP_HOIST_STATS
5797 m_curLoopHasHoistedExpression = false;
5798 m_loopsConsidered++;
5799 #endif // LOOP_HOIST_STATS
5801 optHoistThisLoop(lnum, hoistCtxt);
5803 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5805 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5807 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5808 // TODO-Cleanup: we should have a set abstraction for loops.
5809 if (hoistedInCurLoop != nullptr)
5811 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5815 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5817 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5821 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5822 child = optLoopTable[child].lpSibling)
5824 optHoistLoopNest(child, hoistCtxt);
5828 // TODO-Cleanup: we should have a set abstraction for loops.
5829 if (hoistedInCurLoop != nullptr)
5831 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5833 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5834 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5840 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5842 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5844 /* If loop was removed continue */
5846 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5851 /* Get the head and tail of the loop */
5853 BasicBlock* head = pLoopDsc->lpHead;
5854 BasicBlock* tail = pLoopDsc->lpBottom;
5855 BasicBlock* lbeg = pLoopDsc->lpEntry;
5858 // We must have a do-while loop
5859 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5864 // The loop-head must dominate the loop-entry.
5865 // TODO-CQ: Couldn't we make this true if it's not?
5866 if (!fgDominate(head, lbeg))
5871 // if lbeg is the start of a new try block then we won't be able to hoist
5872 if (!BasicBlock::sameTryRegion(head, lbeg))
5877 // We don't bother hoisting when inside of a catch block
5878 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5883 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5885 unsigned begn = lbeg->bbNum;
5886 unsigned endn = tail->bbNum;
5888 // Ensure the per-loop sets/tables are empty.
5889 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5894 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5895 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5899 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5901 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5902 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5903 pLoopDsc->lpHoistedExprCount = 0;
5905 #ifndef _TARGET_64BIT_
5906 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5908 if (longVarsCount > 0)
5910 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5911 // the Counts such that each TYP_LONG variable counts twice.
5913 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5914 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5919 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5920 lvaDispVarSet(lvaLongVars);
5923 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5924 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5926 #endif // !_TARGET_64BIT_
5931 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5932 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5934 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5935 lvaDispVarSet(pLoopDsc->lpVarInOut);
5937 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5938 lvaDispVarSet(loopVars);
5943 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5945 if (floatVarsCount > 0)
5947 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5948 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5950 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5951 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5952 pLoopDsc->lpHoistedFPExprCount = 0;
5954 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5955 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5960 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5961 lvaDispVarSet(inOutFPVars);
5963 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5964 lvaDispVarSet(loopFPVars);
5968 else // (floatVarsCount == 0)
5970 pLoopDsc->lpLoopVarFPCount = 0;
5971 pLoopDsc->lpVarInOutFPCount = 0;
5972 pLoopDsc->lpHoistedFPExprCount = 0;
5975 // Find the set of definitely-executed blocks.
5976 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5977 // Until we have post-dominators, we'll special-case for single-exit blocks.
5978 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5979 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5981 assert(pLoopDsc->lpExit != nullptr);
5982 BasicBlock* cur = pLoopDsc->lpExit;
5983 // Push dominators, until we reach "entry" or exit the loop.
5984 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5989 // If we didn't reach the entry block, give up and *just* push the entry block.
5990 if (cur != pLoopDsc->lpEntry)
5994 defExec.Push(pLoopDsc->lpEntry);
5996 else // More than one exit
5998 // We'll assume that only the entry block is definitely executed.
5999 // We could in the future do better.
6000 defExec.Push(pLoopDsc->lpEntry);
6003 while (defExec.Size() > 0)
6005 // Consider in reverse order: dominator before dominatee.
6006 BasicBlock* blk = defExec.Pop();
6007 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
6011 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
6012 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
6014 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6015 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
6016 unsigned blkWeight = blk->getBBWeight(this);
6021 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
6022 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
6023 firstBlockAndBeforeSideEffect ? "true" : "false");
6024 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6026 printf(" block weight is too small to perform hoisting.\n");
6031 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6033 // Block weight is too small to perform hoisting.
6037 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6039 GenTreePtr stmtTree = stmt->gtStmtExpr;
6041 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6044 // we will try to hoist the top-level stmtTree
6045 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6050 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6052 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6054 bool loopContainsCall = pLoopDsc->lpContainsCall;
6057 int hoistedExprCount;
6061 if (varTypeIsFloating(tree->TypeGet()))
6063 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6064 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6065 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6067 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6068 if (!loopContainsCall)
6070 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6073 // For ARM each double takes two FP registers
6074 // For now on ARM we won't track singles/doubles
6075 // and instead just assume that we always have doubles.
6082 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6083 loopVarCount = pLoopDsc->lpLoopVarCount;
6084 varInOutCount = pLoopDsc->lpVarInOutCount;
6086 availRegCount = CNT_CALLEE_SAVED - 1;
6087 if (!loopContainsCall)
6089 availRegCount += CNT_CALLEE_TRASH - 1;
6091 #ifndef _TARGET_64BIT_
6092 // For our 32-bit targets Long types take two registers.
6093 if (varTypeIsLong(tree->TypeGet()))
6095 availRegCount = (availRegCount + 1) / 2;
6100 // decrement the availRegCount by the count of expression that we have already hoisted.
6101 availRegCount -= hoistedExprCount;
6103 // the variables that are read/written inside the loop should
6104 // always be a subset of the InOut variables for the loop
6105 assert(loopVarCount <= varInOutCount);
6107 // When loopVarCount >= availRegCount we believe that all of the
6108 // available registers will get used to hold LclVars inside the loop.
6109 // This pessimistically assumes that each loopVar has a conflicting
6110 // lifetime with every other loopVar.
6111 // For this case we will hoist the expression only if is profitable
6112 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6113 // as we believe it will be placed in the stack or one of the other
6114 // loopVars will be spilled into the stack
6116 if (loopVarCount >= availRegCount)
6118 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6119 if (tree->gtCostEx < (2 * IND_COST_EX))
6125 // When varInOutCount < availRegCount we are know that there are
6126 // some available register(s) when we enter the loop body.
6127 // When varInOutCount == availRegCount there often will be a register
6128 // available when we enter the loop body, since a loop often defines a
6129 // LclVar on exit or there is often at least one LclVar that is worth
6130 // spilling to the stack to make way for this hoisted expression.
6131 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6133 if (varInOutCount > availRegCount)
6135 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6136 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6146 // This function returns true if 'tree' is a loop invariant expression.
6147 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6149 bool Compiler::optHoistLoopExprsForTree(
6150 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6152 // First do the children.
6153 // We must keep track of whether each child node was hoistable or not
6155 unsigned nChildren = tree->NumChildren();
6156 bool childrenHoistable[GenTree::MAX_CHILDREN];
6158 // Initialize the array elements for childrenHoistable[] to false
6159 for (unsigned i = 0; i < nChildren; i++)
6161 childrenHoistable[i] = false;
6164 bool treeIsInvariant = true;
6165 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6167 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6168 &childrenHoistable[childNum]))
6170 treeIsInvariant = false;
6174 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6176 bool treeIsHoistable = treeIsInvariant;
6178 // But we must see if anything else prevents "tree" from being hoisted.
6180 if (treeIsInvariant)
6182 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6183 treeIsHoistable = optIsCSEcandidate(tree);
6185 // If it's a call, it must be a helper call, and be pure.
6186 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6187 // (meaning it won't run a cctor because the class is not precise-init).
6188 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6190 GenTreeCall* call = tree->AsCall();
6191 if (call->gtCallType != CT_HELPER)
6193 treeIsHoistable = false;
6197 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6198 if (!s_helperCallProperties.IsPure(helpFunc))
6200 treeIsHoistable = false;
6202 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6204 treeIsHoistable = false;
6209 if (treeIsHoistable)
6211 if (!(*pFirstBlockAndBeforeSideEffect))
6213 // For now, we give up on an expression that might raise an exception if it is after the
6214 // first possible global side effect (and we assume we're after that if we're not in the first block).
6215 // TODO-CQ: this is when we might do loop cloning.
6217 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6219 treeIsHoistable = false;
6222 // Currently we must give up on reads from static variables (even if we are in the first block).
6224 if (tree->OperGet() == GT_CLS_VAR)
6226 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6228 treeIsHoistable = false;
6232 // Is the value of the whole tree loop invariant?
6234 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6236 // Is the value of the whole tree loop invariant?
6237 if (!treeIsInvariant)
6239 treeIsHoistable = false;
6243 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6244 // If we encounter a tree with a call in it
6245 // or if we see an assignment to global we set it to false.
6247 // If we are already set to false then we can skip these checks
6249 if (*pFirstBlockAndBeforeSideEffect)
6251 // For this purpose, we only care about memory side effects. We assume that expressions will
6252 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6253 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6254 // here, since that includes exceptions.)
6257 // If it's a call, it must be a helper call that does not mutate the heap.
6258 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6259 // (meaning it won't run a cctor because the class is not precise-init).
6260 GenTreeCall* call = tree->AsCall();
6261 if (call->gtCallType != CT_HELPER)
6263 *pFirstBlockAndBeforeSideEffect = false;
6267 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6268 if (s_helperCallProperties.MutatesHeap(helpFunc))
6270 *pFirstBlockAndBeforeSideEffect = false;
6272 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6274 *pFirstBlockAndBeforeSideEffect = false;
6278 else if (tree->OperIsAssignment())
6280 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6281 GenTreePtr lhs = tree->gtOp.gtOp1;
6282 if (lhs->gtFlags & GTF_GLOB_REF)
6284 *pFirstBlockAndBeforeSideEffect = false;
6287 else if (tree->OperIsCopyBlkOp())
6289 GenTreePtr args = tree->gtOp.gtOp1;
6290 assert(args->OperGet() == GT_LIST);
6291 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6293 *pFirstBlockAndBeforeSideEffect = false;
6298 // If this 'tree' is hoistable then we return and the caller will
6299 // decide to hoist it as part of larger hoistable expression.
6301 if (!treeIsHoistable)
6303 // We are not hoistable so we will now hoist any hoistable children.
6305 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6307 if (childrenHoistable[childNum])
6309 // We can't hoist the LHS of an assignment, isn't a real use.
6310 if (childNum == 0 && (tree->OperIsAssignment()))
6315 GenTreePtr child = tree->GetChild(childNum);
6317 // We try to hoist this 'child' tree
6318 optHoistCandidate(child, lnum, hoistCtxt);
6323 *pHoistable = treeIsHoistable;
6324 return treeIsInvariant;
6327 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6329 if (lnum == BasicBlock::NOT_IN_LOOP)
6331 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6335 // The outer loop also must be suitable for hoisting...
6336 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6341 // If the hoisted expression isn't valid at this loop head then break
6342 if (!optTreeIsValidAtLoopHead(tree, lnum))
6347 // It must pass the hoistable profitablity tests for this loop level
6348 if (!optIsProfitableToHoistableTree(tree, lnum))
6354 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6356 // already hoisted in a parent loop, so don't hoist this expression.
6360 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6362 // already hoisted this expression in the current loop, so don't hoist this expression.
6366 // Expression can be hoisted
6367 optPerformHoistExpr(tree, lnum);
6369 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6370 if (!varTypeIsFloating(tree->TypeGet()))
6372 optLoopTable[lnum].lpHoistedExprCount++;
6373 #ifndef _TARGET_64BIT_
6374 // For our 32-bit targets Long types take two registers.
6375 if (varTypeIsLong(tree->TypeGet()))
6377 optLoopTable[lnum].lpHoistedExprCount++;
6381 else // Floating point expr hoisted
6383 optLoopTable[lnum].lpHoistedFPExprCount++;
6386 // Record the hoisted expression in hoistCtxt
6387 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6390 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6392 // If it is not a VN, is not loop-invariant.
6393 if (vn == ValueNumStore::NoVN)
6398 // We'll always short-circuit constants.
6399 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6404 // If we've done this query previously, don't repeat.
6405 bool previousRes = false;
6406 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6413 if (vnStore->GetVNFunc(vn, &funcApp))
6415 if (funcApp.m_func == VNF_PhiDef)
6417 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6418 VNFuncApp phiDefValFuncApp;
6419 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6421 // It's not *really* a definition, rather a pass-through of some other VN.
6422 // (This could occur, say if both sides of an if-then-else diamond made the
6423 // same assignment to a variable.)
6424 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6428 // Is the definition within the loop? If so, is not loop-invariant.
6429 unsigned lclNum = funcApp.m_args[0];
6430 unsigned ssaNum = funcApp.m_args[1];
6431 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6432 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6435 else if (funcApp.m_func == VNF_PhiHeapDef)
6437 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6438 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6442 for (unsigned i = 0; i < funcApp.m_arity; i++)
6444 // TODO-CQ: We need to either make sure that *all* VN functions
6445 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6447 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6457 // Non-function "new, unique" VN's may be annotated with the loop nest where
6458 // their definition occurs.
6459 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
6461 if (vnLoopNum == MAX_LOOP_NUM)
6467 res = !optLoopContains(lnum, vnLoopNum);
6471 loopVnInvariantCache->Set(vn, res);
6475 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6477 if (tree->OperIsLocal())
6479 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6480 unsigned lclNum = lclVar->gtLclNum;
6482 // The lvlVar must be have an Ssa tracked lifetime
6483 if (fgExcludeFromSsa(lclNum))
6488 // If the loop does not contains the SSA def we can hoist it.
6489 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6494 else if (tree->OperIsConst())
6498 else // If every one of the children nodes are valid at this Loop's Head.
6500 unsigned nChildren = tree->NumChildren();
6501 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6503 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6513 /*****************************************************************************
6515 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6516 * header. The pre-header will replace the current lpHead in the loop table.
6517 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6518 * will also be dominated by the loop-top, lpHead->bbNext.
6522 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6524 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6526 /* This loop has to be a "do-while" loop */
6528 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6530 /* Have we already created a loop-preheader block? */
6532 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6537 BasicBlock* head = pLoopDsc->lpHead;
6538 BasicBlock* top = pLoopDsc->lpTop;
6539 BasicBlock* entry = pLoopDsc->lpEntry;
6541 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6542 if (!BasicBlock::sameTryRegion(head, entry))
6547 // Ensure that lpHead always dominates lpEntry
6549 noway_assert(fgDominate(head, entry));
6551 /* Get hold of the first block of the loop body */
6553 assert(top == entry);
6555 /* Allocate a new basic block */
6557 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6558 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6560 // Must set IL code offset
6561 preHead->bbCodeOffs = top->bbCodeOffs;
6563 // Set the default value of the preHead weight in case we don't have
6564 // valid profile data and since this blocks weight is just an estimate
6565 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6567 preHead->inheritWeight(head);
6568 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6573 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6574 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6578 // The preheader block is part of the containing loop (if any).
6579 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6581 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6583 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6585 preHead->bbWeight = 0;
6586 preHead->bbFlags |= BBF_RUN_RARELY;
6590 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6591 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6592 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6594 if (allValidProfileWeights)
6596 double loopEnteredCount;
6597 double loopSkippedCount;
6599 if (fgHaveValidEdgeWeights)
6601 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6602 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6603 noway_assert(edgeToNext != nullptr);
6604 noway_assert(edgeToJump != nullptr);
6607 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6609 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6613 loopEnteredCount = (double)head->bbNext->bbWeight;
6614 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6617 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6619 // Calculate a good approximation of the preHead's block weight
6620 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6621 preHead->setBBWeight(max(preHeadWeight, 1));
6622 noway_assert(!preHead->isRunRarely());
6627 // Link in the preHead block.
6628 fgInsertBBbefore(top, preHead);
6630 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6631 // However, that is too expensive at this point. Instead, we update the phi
6632 // node block references, if we created pre-header block due to hoisting.
6633 // This is sufficient because any definition participating in SSA that flowed
6634 // into the phi via the loop header block will now flow through the preheader
6635 // block from the header block.
6637 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6639 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6640 if (tree->OperGet() != GT_ASG)
6644 GenTreePtr op2 = tree->gtGetOp2();
6645 if (op2->OperGet() != GT_PHI)
6649 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6650 while (args != nullptr)
6652 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6653 if (phiArg->gtPredBB == head)
6655 phiArg->gtPredBB = preHead;
6657 args = args->Rest();
6661 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6662 // to set the handler index on the pre header without updating the exception table.
6663 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6665 // Update the EH table to make the hoisted block part of the loop's EH block.
6666 fgExtendEHRegionBefore(top);
6668 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6669 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6671 /* Update the loop entry */
6673 pLoopDsc->lpHead = preHead;
6674 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6676 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6677 All predecessors of 'beg', (which is the entry in the loop)
6678 now have to jump to 'preHead', unless they are dominated by 'head' */
6680 preHead->bbRefs = 0;
6681 fgAddRefPred(preHead, head);
6682 bool checkNestedLoops = false;
6684 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6686 BasicBlock* predBlock = pred->flBlock;
6688 if (fgDominate(top, predBlock))
6690 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6691 // (we know that 'head' dominates 'top'), but using 'top' instead of
6692 // 'head' in the test allows us to not enter here if 'predBlock == head'
6694 if (predBlock != pLoopDsc->lpBottom)
6696 noway_assert(predBlock != head);
6697 checkNestedLoops = true;
6702 switch (predBlock->bbJumpKind)
6705 noway_assert(predBlock == head);
6709 if (predBlock == head)
6711 noway_assert(predBlock->bbJumpDest != top);
6717 case BBJ_EHCATCHRET:
6718 noway_assert(predBlock->bbJumpDest == top);
6719 predBlock->bbJumpDest = preHead;
6720 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6722 if (predBlock == head)
6724 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6725 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6726 // Just break, pred will be removed after switch.
6730 fgRemoveRefPred(top, predBlock);
6731 fgAddRefPred(preHead, predBlock);
6737 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6738 BasicBlock** jumpTab;
6739 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6744 if ((*jumpTab) == top)
6746 (*jumpTab) = preHead;
6748 fgRemoveRefPred(top, predBlock);
6749 fgAddRefPred(preHead, predBlock);
6750 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6752 } while (++jumpTab, --jumpCnt);
6755 noway_assert(!"Unexpected bbJumpKind");
6760 noway_assert(!fgGetPredForBlock(top, preHead));
6761 fgRemoveRefPred(top, head);
6762 fgAddRefPred(top, preHead);
6765 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6766 (other than the back-edge of the loop we are considering) then we likely have nested
6767 do-while loops with the same entry block and inserting the preheader block changes the head
6768 of all the nested loops. Now we will update this piece of information in the loop table, and
6769 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6770 do-while loops with the same entry block).
6772 if (checkNestedLoops)
6774 for (unsigned l = 0; l < optLoopCount; l++)
6776 if (optLoopTable[l].lpHead == head)
6778 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6779 noway_assert(optLoopTable[l].lpEntry == top);
6780 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6781 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6785 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6786 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6794 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6796 for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
6798 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6802 if (optLoopTable[lnum].lpEntry == blk)
6811 void Compiler::optComputeLoopSideEffects()
6814 for (lnum = 0; lnum < optLoopCount; lnum++)
6816 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6817 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6818 optLoopTable[lnum].lpContainsCall = false;
6821 for (lnum = 0; lnum < optLoopCount; lnum++)
6823 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6828 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6829 { // Is outermost...
6830 optComputeLoopNestSideEffects(lnum);
6834 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6835 #ifndef _TARGET_64BIT_
6836 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6839 for (unsigned i = 0; i < lvaCount; i++)
6841 LclVarDsc* varDsc = &lvaTable[i];
6842 if (varDsc->lvTracked)
6844 if (varTypeIsFloating(varDsc->lvType))
6846 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6848 #ifndef _TARGET_64BIT_
6849 else if (varTypeIsLong(varDsc->lvType))
6851 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6858 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6860 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6861 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6862 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6864 optComputeLoopSideEffectsOfBlock(bbInLoop);
6868 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6870 unsigned mostNestedLoop = blk->bbNatLoopNum;
6871 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6873 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6875 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6877 // Now iterate over the remaining statements, and their trees.
6878 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6880 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6882 genTreeOps oper = tree->OperGet();
6884 // Even after we set heapHavoc we still may want to know if a loop contains calls
6887 if (oper == GT_CALL)
6889 // Record that this loop contains a call
6890 AddContainsCallAllContainingLoops(mostNestedLoop);
6893 // If we just set lpContainsCall or it was previously set
6894 if (optLoopTable[mostNestedLoop].lpContainsCall)
6896 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6900 // We are just looking for GT_CALL nodes after heapHavoc was set.
6904 // otherwise heapHavoc is not set
6907 // This body is a distillation of the heap-side effect code of value numbering.
6908 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6909 // that the compiler creates.
6911 if (GenTree::OperIsAssignment(oper))
6913 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6915 if (lhs->OperGet() == GT_IND)
6917 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6918 FieldSeqNode* fldSeqArrElem = nullptr;
6920 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6928 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6930 // If it's a local byref for which we recorded a value number, use that...
6931 GenTreeLclVar* argLcl = arg->AsLclVar();
6932 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6935 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6937 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6938 funcApp.m_func == VNF_PtrToArrElem)
6940 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6941 CORINFO_CLASS_HANDLE elemType =
6942 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6943 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6944 // Don't set heapHavoc below.
6951 // Is the LHS an array index expression?
6952 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6954 // We actually ignore "fldSeq" -- any modification to an S[], at any
6955 // field of "S", will lose all information about the array type.
6956 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6957 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6961 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6963 GenTreePtr obj = nullptr; // unused
6964 GenTreePtr staticOffset = nullptr; // unused
6965 FieldSeqNode* fldSeq = nullptr;
6967 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6968 (fldSeq != FieldSeqStore::NotAField()))
6970 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6971 assert(fldSeq != nullptr);
6972 if (fldSeq->IsFirstElemFieldSeq())
6974 fldSeq = fldSeq->m_next;
6975 assert(fldSeq != nullptr);
6978 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6986 else if (lhs->OperIsBlk())
6988 GenTreeLclVarCommon* lclVarTree;
6990 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6992 // For now, assume arbitrary side effects on the heap...
6996 else if (lhs->OperGet() == GT_CLS_VAR)
6998 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
7000 // Otherwise, must be local lhs form. I should assert that.
7001 else if (lhs->OperGet() == GT_LCL_VAR)
7003 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
7004 GenTreePtr rhs = tree->gtOp.gtOp2;
7005 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
7006 // If we gave the RHS a value number, propagate it.
7007 if (rhsVN != ValueNumStore::NoVN)
7009 rhsVN = vnStore->VNNormVal(rhsVN);
7010 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
7012 lvaTable[lhsLcl->GetLclNum()]
7013 .GetPerSsaData(lhsLcl->GetSsaNum())
7014 ->m_vnPair.SetLiberal(rhsVN);
7019 else // not GenTree::OperIsAssignment(oper)
7024 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7028 // Is it an addr of a array index expression?
7030 GenTreePtr addrArg = tree->gtOp.gtOp1;
7031 if (addrArg->OperGet() == GT_IND)
7033 // Is the LHS an array index expression?
7034 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7037 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7039 CORINFO_CLASS_HANDLE elemType =
7040 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7041 tree->gtVNPair.SetBoth(
7042 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7043 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7044 // The rest are dummy arguments.
7045 vnStore->VNForNull(), vnStore->VNForNull(),
7046 vnStore->VNForNull()));
7052 case GT_LOCKADD: // Binop
7053 case GT_XADD: // Binop
7054 case GT_XCHG: // Binop
7055 case GT_CMPXCHG: // Specialop
7063 GenTreeCall* call = tree->AsCall();
7065 // Record that this loop contains a call
7066 AddContainsCallAllContainingLoops(mostNestedLoop);
7068 if (call->gtCallType == CT_HELPER)
7070 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7071 if (s_helperCallProperties.MutatesHeap(helpFunc))
7075 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7077 // If the call is labeled as "Hoistable", then we've checked the
7078 // class that would be constructed, and it is not precise-init, so
7079 // the cctor will not be run by this call. Otherwise, it might be,
7080 // and might have arbitrary side effects.
7081 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7095 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7104 // Record that all loops containing this block have heap havoc effects.
7105 unsigned lnum = mostNestedLoop;
7106 while (lnum != BasicBlock::NOT_IN_LOOP)
7108 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7109 lnum = optLoopTable[lnum].lpParent;
7114 // Marks the containsCall information to "lnum" and any parent loops.
7115 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7117 assert(0 <= lnum && lnum < optLoopCount);
7118 while (lnum != BasicBlock::NOT_IN_LOOP)
7120 optLoopTable[lnum].lpContainsCall = true;
7121 lnum = optLoopTable[lnum].lpParent;
7125 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7126 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7128 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7129 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7131 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7132 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7135 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7136 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7138 assert(0 <= lnum && lnum < optLoopCount);
7139 while (lnum != BasicBlock::NOT_IN_LOOP)
7141 optLoopTable[lnum].AddVariableLiveness(this, blk);
7142 lnum = optLoopTable[lnum].lpParent;
7146 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7147 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7149 assert(0 <= lnum && lnum < optLoopCount);
7150 while (lnum != BasicBlock::NOT_IN_LOOP)
7152 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7153 lnum = optLoopTable[lnum].lpParent;
7157 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7158 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7160 assert(0 <= lnum && lnum < optLoopCount);
7161 while (lnum != BasicBlock::NOT_IN_LOOP)
7163 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7164 lnum = optLoopTable[lnum].lpParent;
7168 /*****************************************************************************
7170 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7171 * The 'keepList'is either a single tree or a list of trees that are formed by
7172 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7173 * gtExtractSideEffList method.
7177 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7179 GenTreePtr tree = *pTree;
7180 Compiler* comp = data->compiler;
7181 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7183 // We may have a non-NULL side effect list that is being kept
7187 GenTreePtr keptTree = keepList;
7188 while (keptTree->OperGet() == GT_COMMA)
7190 assert(keptTree->OperKind() & GTK_SMPOP);
7191 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7192 GenTreePtr op2 = keptTree->gtGetOp2();
7194 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7195 // that is being kept because it contains some side-effect
7199 // This tree and all of its sub trees are being kept.
7200 return WALK_SKIP_SUBTREES;
7203 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7204 // which can again be another GT_COMMA or the final side-effect part
7208 if (tree == keptTree)
7210 // This tree and all of its sub trees are being kept.
7211 return WALK_SKIP_SUBTREES;
7215 // This node is being removed from the graph of GenTreePtr
7217 // Look for any local variable references
7219 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7224 /* This variable ref is going away, decrease its ref counts */
7226 lclNum = tree->gtLclVarCommon.gtLclNum;
7227 assert(lclNum < comp->lvaCount);
7228 varDsc = comp->lvaTable + lclNum;
7230 // make sure it's been initialized
7231 assert(comp->compCurBB != nullptr);
7232 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7234 /* Decrement its lvRefCnt and lvRefCntWtd */
7236 // Use getBBWeight to determine the proper block weight.
7237 // This impacts the block weights when we have IBC data.
7238 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7241 return WALK_CONTINUE;
7244 /*****************************************************************************
7246 * Routine called to decrement the LclVar ref counts when removing a tree
7247 * during the remove RangeCheck phase.
7248 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7249 * unless the node is found in the 'keepList' (which are saved side effects)
7250 * The keepList is communicated using the walkData.pCallbackData field
7251 * Also the compCurBB must be set to the current BasicBlock which contains
7252 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7255 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7257 // We communicate this value using the walkData.pCallbackData field
7259 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7262 /*****************************************************************************
7264 * Given an array index node, mark it as not needing a range check.
7267 void Compiler::optRemoveRangeCheck(
7268 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7284 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7287 noway_assert(stmt->gtOper == GT_STMT);
7288 noway_assert(tree->gtOper == GT_COMMA);
7289 noway_assert(tree->gtOp.gtOp1->OperIsBoundsCheck());
7290 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7292 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7297 printf("Before optRemoveRangeCheck:\n");
7302 GenTreePtr sideEffList = nullptr;
7305 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7308 // Decrement the ref counts for any LclVars that are being deleted
7310 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7312 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7313 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7315 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7316 tree->gtFlags |= GTF_DONT_CSE;
7318 /* Recalculate the gtCostSz, etc... */
7319 gtSetStmtInfo(stmt);
7321 /* Re-thread the nodes if necessary */
7322 if (fgStmtListThreaded)
7330 printf("After optRemoveRangeCheck:\n");
7336 /*****************************************************************************
7337 * Return the scale in an array reference, given a pointer to the
7338 * multiplication node.
7341 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7344 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7345 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7347 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7349 if (mul->gtOper == GT_LSH)
7351 scale = ((ssize_t)1) << scale;
7354 GenTreePtr index = mul->gtOp.gtOp1;
7356 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7358 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7359 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7360 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7361 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7362 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7363 index = index->gtOp.gtOp1;
7366 assert(!bRngChk || index->gtOper != GT_COMMA);
7376 /*****************************************************************************
7377 * Find the last assignment to of the local variable in the block. Return
7378 * RHS or NULL. If any local variable in the RHS has been killed in
7379 * intervening code, return NULL. If the variable being searched for is killed
7380 * in the intervening code, return NULL.
7384 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7386 VARSET_TP* pKilledInOut,
7387 bool* pLhsRhsKilledAfterInit)
7389 assert(pKilledInOut);
7390 assert(pLhsRhsKilledAfterInit);
7392 *pLhsRhsKilledAfterInit = false;
7394 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7396 GenTreePtr list = block->bbTreeList;
7397 if (list == nullptr)
7402 GenTreePtr rhs = nullptr;
7403 GenTreePtr stmt = list;
7406 stmt = stmt->gtPrev;
7407 if (stmt == nullptr)
7412 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7413 // If we encounter an assignment to a local variable,
7414 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7416 // And the assigned variable equals the input local,
7417 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7419 // If the assignment is '=' and it is not a conditional, then return rhs.
7420 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7422 rhs = tree->gtOp.gtOp2;
7424 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7425 // as we found a kill to the input local.
7428 *pLhsRhsKilledAfterInit = true;
7429 assert(rhs == nullptr);
7435 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7436 if (varDsc == nullptr)
7440 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7443 } while (stmt != list);
7450 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7451 varRefKinds rhsRefs = VR_NONE;
7452 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7453 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7454 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7456 // If RHS has been indirectly referenced, consider it a write and a kill.
7457 *pLhsRhsKilledAfterInit = true;
7464 /*****************************************************************************
7466 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7471 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7473 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7475 add1 += op1->gtIntCon.gtIconVal;
7476 add2 += op2->gtIntCon.gtIconVal;
7480 /* Check for +/- constant on either operand */
7482 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7484 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7485 op1 = op1->gtOp.gtOp1;
7488 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7490 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7491 op2 = op2->gtOp.gtOp1;
7494 /* We only allow local variable references */
7496 if (op1->gtOper != GT_LCL_VAR)
7498 if (op2->gtOper != GT_LCL_VAR)
7500 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7503 /* NOTE: Caller ensures that this variable has only one def */
7505 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7506 // printf("size [%d]:\n", add2); gtDispTree(op2);
7510 return (bool)(add1 <= add2);
7515 //------------------------------------------------------------------------------
7516 // optObtainLoopCloningOpts: Identify optimization candidates and update
7517 // the "context" for array optimizations.
7520 // context - data structure where all loop cloning info is kept. The
7521 // optInfo fields of the context are updated with the
7522 // identified optimization candidates.
7524 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7526 for (unsigned i = 0; i < optLoopCount; i++)
7528 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7529 if (optIsLoopClonable(i))
7531 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7533 optIdentifyLoopOptInfo(i, context);
7536 JITDUMP("------------------------------------------------------------\n");
7541 //------------------------------------------------------------------------
7542 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7543 // check if the loop is suitable for the optimizations performed.
7546 // loopNum - the current loop index for which conditions are derived.
7547 // context - data structure where all loop cloning candidates will be
7551 // If the loop is not suitable for the optimizations, return false - context
7552 // should not contain any optimization candidate for the loop if false.
7553 // Else return true.
7556 // Check if the loop is well formed for this optimization and identify the
7557 // optimization candidates and update the "context" parameter with all the
7558 // contextual information necessary to perform the optimization later.
7560 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7562 noway_assert(loopNum < optLoopCount);
7564 LoopDsc* pLoop = &optLoopTable[loopNum];
7566 if (!(pLoop->lpFlags & LPFLG_ITER))
7568 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7572 unsigned ivLclNum = pLoop->lpIterVar();
7573 if (lvaVarAddrExposed(ivLclNum))
7575 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7579 BasicBlock* head = pLoop->lpHead;
7580 BasicBlock* end = pLoop->lpBottom;
7581 BasicBlock* beg = head->bbNext;
7583 if (end->bbJumpKind != BBJ_COND)
7585 JITDUMP("> Couldn't find termination test.\n");
7589 if (end->bbJumpDest != beg)
7591 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7595 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7596 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7598 JITDUMP("> Loop iteration operator not matching\n");
7602 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7603 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7605 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7609 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7610 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7611 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7612 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7614 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7615 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7619 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7621 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7622 pLoop->lpTestTree->gtTreeID);
7627 GenTreePtr op1 = pLoop->lpIterator();
7628 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7631 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7632 end->bbNext ? end->bbNext->bbNum : 0);
7634 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7635 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7638 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7641 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7648 //---------------------------------------------------------------------------------------------------------------
7649 // optExtractArrIndex: Try to extract the array index from "tree".
7652 // tree the tree to be checked if it is the array [] operation.
7653 // result the extracted GT_INDEX information is updated in result.
7654 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7657 // Returns true if array index can be extracted, else, return false. See assumption about
7658 // what will be extracted. The "result" variable's rank parameter is advanced for every
7659 // dimension of [] encountered.
7662 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7663 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7664 // to reconstruct this to be able to know if this is an array access.
7667 // The method extracts only if the array base and indices are GT_LCL_VAR.
7669 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7671 // [000000001AF828D8] ---XG------- indir int
7672 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7673 // [000000001AF87340] ------------ + byref
7674 // [000000001AF87160] -------N---- const long 2
7675 // [000000001AF871D8] ------------ << long
7676 // [000000001AF870C0] ------------ cast long <- int
7677 // [000000001AF86F30] i----------- lclVar int V04 loc0
7678 // [000000001AF87250] ------------ + byref
7679 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7680 // [000000001AF87468] ---XG------- comma int
7681 // [000000001AF87020] ---X-------- arrBndsChk void
7682 // [000000001AF86FA8] ---X-------- arrLen int
7683 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7684 // [000000001AF82860] ------------ lclVar int V04 loc0
7685 // [000000001AF829F0] -A-XG------- = int
7686 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7688 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7690 if (tree->gtOper != GT_COMMA)
7694 GenTreePtr before = tree->gtGetOp1();
7695 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7699 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7700 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7704 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7708 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7709 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7714 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7716 GenTreePtr after = tree->gtGetOp2();
7718 if (after->gtOper != GT_IND)
7722 // It used to be the case that arrBndsChks for struct types would fail the previous check because
7723 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
7724 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
7725 // that we were not previously cloning.)
7726 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
7728 if (varTypeIsStruct(after))
7733 GenTreePtr sibo = after->gtGetOp1();
7734 if (sibo->gtOper != GT_ADD)
7738 GenTreePtr sib = sibo->gtGetOp1();
7739 GenTreePtr ofs = sibo->gtGetOp2();
7740 if (ofs->gtOper != GT_CNS_INT)
7744 if (sib->gtOper != GT_ADD)
7748 GenTreePtr si = sib->gtGetOp2();
7749 GenTreePtr base = sib->gtGetOp1();
7750 if (si->gtOper != GT_LSH)
7754 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7758 GenTreePtr scale = si->gtGetOp2();
7759 GenTreePtr index = si->gtGetOp1();
7760 if (scale->gtOper != GT_CNS_INT)
7764 #ifdef _TARGET_AMD64_
7765 if (index->gtOper != GT_CAST)
7769 GenTreePtr indexVar = index->gtGetOp1();
7771 GenTreePtr indexVar = index;
7773 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7777 if (lhsNum == BAD_VAR_NUM)
7779 result->arrLcl = arrLcl;
7781 result->indLcls.Push(indLcl);
7782 result->bndsChks.Push(tree);
7783 result->useBlock = compCurBB;
7789 //---------------------------------------------------------------------------------------------------------------
7790 // optReconstructArrIndex: Reconstruct array index.
7793 // tree the tree to be checked if it is an array [][][] operation.
7794 // result the extracted GT_INDEX information.
7795 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7798 // Returns true if array index can be extracted, else, return false. "rank" field in
7799 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7802 // Recursively look for a list of array indices. In the example below, we encounter,
7803 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7804 // The return value would then be:
7805 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7807 // V00[V01][V02] would be morphed as:
7809 // [000000001B366848] ---XG------- indir int
7810 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7811 // [000000001B36C200] ---XG------- comma int
7812 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7813 // [000000001B36C278] -A-XG------- comma int
7814 // [000000001B366730] R--XG------- indir ref
7815 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7816 // [000000001B36C818] ---XG------- comma ref
7817 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7818 // [000000001B36BB60] -A-XG------- = ref
7819 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7820 // [000000001B36A668] -A-XG------- = int
7821 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7824 // The method extracts only if the array base and indices are GT_LCL_VAR.
7826 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7828 // If we can extract "tree" (which is a top level comma) return.
7829 if (optExtractArrIndex(tree, result, lhsNum))
7833 // We have a comma (check if array base expr is computed in "before"), descend further.
7834 else if (tree->OperGet() == GT_COMMA)
7836 GenTreePtr before = tree->gtGetOp1();
7837 // "before" should evaluate an array base for the "after" indexing.
7838 if (before->OperGet() != GT_ASG)
7842 GenTreePtr lhs = before->gtGetOp1();
7843 GenTreePtr rhs = before->gtGetOp2();
7845 // "rhs" should contain an GT_INDEX
7846 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7850 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7851 GenTreePtr after = tree->gtGetOp2();
7852 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7853 return optExtractArrIndex(after, result, lhsNum);
7859 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7861 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7864 //-------------------------------------------------------------------------
7865 // optIsStackLocalInvariant: Is stack local invariant in loop.
7868 // loopNum The loop in which the variable is tested for invariance.
7869 // lclNum The local that is tested for invariance in the loop.
7872 // Returns true if the variable is loop invariant in loopNum.
7874 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7876 if (lvaVarAddrExposed(lclNum))
7880 if (optIsVarAssgLoop(loopNum, lclNum))
7887 //----------------------------------------------------------------------------------------------
7888 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7889 // identify as potential candidate and update the loop context.
7892 // tree The tree encountered during the tree walk.
7893 // info Supplies information about the current block or stmt in which the tree is.
7894 // Also supplies the "context" pointer for updating with loop cloning
7895 // candidates. Also supplies loopNum.
7898 // If array index can be reconstructed, check if the iter var of the loop matches the
7899 // array index var in some dim. Also ensure other index vars before the identified
7900 // dim are loop invariant.
7903 // Skip sub trees if the optimization candidate is identified or else continue walking
7905 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7907 ArrIndex arrIndex(getAllocator());
7909 // Check if array index can be optimized.
7910 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7912 assert(tree->gtOper == GT_COMMA);
7916 JITDUMP("Found ArrIndex at tree ");
7918 printf(" which is equivalent to: ");
7923 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7925 return WALK_SKIP_SUBTREES;
7928 // Walk the dimensions and see if iterVar of the loop is used as index.
7929 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7931 // Is index variable also used as the loop iter var.
7932 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7934 // Check the previous indices are all loop invariant.
7935 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7937 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7939 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7940 return WALK_SKIP_SUBTREES;
7946 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7948 JITDUMP(" on dim %d\n", dim);
7951 // Update the loop context.
7952 info->context->EnsureLoopOptInfo(info->loopNum)
7953 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7957 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7961 return WALK_SKIP_SUBTREES;
7963 else if (tree->gtOper == GT_ARR_ELEM)
7965 // TODO-CQ: CLONE: Implement.
7966 return WALK_SKIP_SUBTREES;
7968 return WALK_CONTINUE;
7971 struct optRangeCheckDsc
7973 Compiler* pCompiler;
7977 Walk to make sure that only locals and constants are contained in the index
7980 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7982 GenTreePtr tree = *pTree;
7983 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7985 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7987 pData->bValidIndex = false;
7991 if (tree->gtOper == GT_LCL_VAR)
7993 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7995 pData->bValidIndex = false;
8000 return WALK_CONTINUE;
8004 returns true if a range check can legally be removed (for the moment it checks
8005 that the array is a local array (non subject to racing conditions) and that the
8006 index is either a constant or a local
8008 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
8010 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
8011 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
8012 GenTreePtr pArray = bndsChk->GetArray();
8013 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
8017 GenTreePtr pIndex = bndsChk->gtIndex;
8019 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
8020 // Otherwise we can be targeted by malicious race-conditions.
8021 if (pArray != nullptr)
8023 if (pArray->gtOper != GT_LCL_VAR)
8029 printf("Can't remove range check if the array isn't referenced with a local\n");
8037 noway_assert(pArray->gtType == TYP_REF);
8038 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8040 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8042 // If the array address has been taken, don't do the optimization
8043 // (this restriction can be lowered a bit, but i don't think it's worth it)
8044 CLANG_FORMAT_COMMENT_ANCHOR;
8048 printf("Can't remove range check if the array has its address taken\n");
8057 optRangeCheckDsc Data;
8058 Data.pCompiler = this;
8059 Data.bValidIndex = true;
8061 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8063 if (!Data.bValidIndex)
8068 printf("Can't remove range check with this index");
8079 /******************************************************************************
8081 * Replace x==null with (x|x)==0 if x is a GC-type.
8082 * This will stress code-gen and the emitter to make sure they support such trees.
8087 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8089 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8094 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8095 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8097 noway_assert(condStmt->gtOper == GT_JTRUE);
8102 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8104 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8109 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8114 GenTreePtr comparandClone = gtCloneExpr(comparand);
8116 // Bump up the ref-counts of any variables in 'comparandClone'
8117 compCurBB = condBlock;
8118 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8120 noway_assert(relop->gtOp.gtOp1 == comparand);
8121 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8122 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8124 // Comparand type is already checked, and we have const int, there is no harm
8125 // morphing it into a TYP_I_IMPL.
8126 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8127 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8132 /******************************************************************************
8133 * Function used by folding of boolean conditionals
8134 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8135 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8136 * being a boolean lclVar and "op2" the const 0/1.
8137 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8138 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8139 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8140 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8141 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8144 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8146 bool isBool = false;
8148 noway_assert(condBranch->gtOper == GT_JTRUE);
8149 GenTree* cond = condBranch->gtOp.gtOp1;
8151 /* The condition must be "!= 0" or "== 0" */
8153 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8158 /* Return the compare node to the caller */
8162 /* Get hold of the comparands */
8164 GenTree* opr1 = cond->gtOp.gtOp1;
8165 GenTree* opr2 = cond->gtOp.gtOp2;
8167 if (opr2->gtOper != GT_CNS_INT)
8172 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8177 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8179 /* Is the value a boolean?
8180 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8181 * a local variable that is marked as being boolean (lvIsBoolean) */
8183 if (opr1->gtFlags & GTF_BOOLEAN)
8187 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8191 else if (opr1->gtOper == GT_LCL_VAR)
8193 /* is it a boolean local variable */
8195 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8196 noway_assert(lclNum < lvaCount);
8198 if (lvaTable[lclNum].lvIsBoolean)
8204 /* Was our comparison against the constant 1 (i.e. true) */
8207 // If this is a boolean expression tree we can reverse the relop
8208 // and change the true to false.
8211 gtReverseCond(cond);
8212 opr2->gtIntCon.gtIconVal = 0;
8224 void Compiler::optOptimizeBools()
8229 printf("*************** In optOptimizeBools()\n");
8232 printf("Blocks/Trees before phase\n");
8233 fgDispBasicBlocks(true);
8243 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8245 /* We're only interested in conditional jumps here */
8247 if (b1->bbJumpKind != BBJ_COND)
8252 /* If there is no next block, we're done */
8254 BasicBlock* b2 = b1->bbNext;
8260 /* The next block must not be marked as BBF_DONT_REMOVE */
8261 if (b2->bbFlags & BBF_DONT_REMOVE)
8266 /* The next block also needs to be a condition */
8268 if (b2->bbJumpKind != BBJ_COND)
8271 optOptimizeBoolsGcStress(b1);
8276 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8278 if (b1->bbJumpDest == b2->bbJumpDest)
8280 /* Given the following sequence of blocks :
8284 we wil try to fold it to :
8285 B1: brtrue(t1|t2, BX)
8291 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8293 /* Given the following sequence of blocks :
8297 we will try to fold it to :
8298 B1: brtrue((!t1)&&t2, B3)
8309 /* The second block must contain a single statement */
8311 GenTreePtr s2 = b2->bbTreeList;
8312 if (s2->gtPrev != s2)
8317 noway_assert(s2->gtOper == GT_STMT);
8318 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8319 noway_assert(t2->gtOper == GT_JTRUE);
8321 /* Find the condition for the first block */
8323 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8325 noway_assert(s1->gtOper == GT_STMT);
8326 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8327 noway_assert(t1->gtOper == GT_JTRUE);
8329 if (b2->countOfInEdges() > 1)
8334 /* Find the branch conditions of b1 and b2 */
8338 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8344 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8350 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8351 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8353 // Leave out floats where the bit-representation is more complicated
8354 // - there are two representations for 0.
8356 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8361 // Make sure the types involved are of the same sizes
8362 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8366 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8370 #ifdef _TARGET_ARMARCH_
8371 // Skip the small operand which we cannot encode.
8372 if (varTypeIsSmall(c1->TypeGet()))
8375 /* The second condition must not contain side effects */
8377 if (c2->gtFlags & GTF_GLOB_EFFECT)
8382 /* The second condition must not be too expensive */
8386 if (c2->gtCostEx > 12)
8393 var_types foldType = c1->TypeGet();
8394 if (varTypeIsGC(foldType))
8396 foldType = TYP_I_IMPL;
8401 /* Both conditions must be the same */
8403 if (t1->gtOper != t2->gtOper)
8408 if (t1->gtOper == GT_EQ)
8410 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8411 So we will branch to BX if (c1&c2)==0 */
8418 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8419 So we will branch to BX if (c1|c2)!=0 */
8427 /* The b1 condition must be the reverse of the b2 condition */
8429 if (t1->gtOper == t2->gtOper)
8434 if (t1->gtOper == GT_EQ)
8436 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8437 So we will branch to BX if (c1&c2)!=0 */
8444 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8445 So we will branch to BX if (c1|c2)==0 */
8452 // Anding requires both values to be 0 or 1
8454 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8460 // Now update the trees
8462 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8465 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8466 cmpOp1->gtFlags |= GTF_BOOLEAN;
8470 t1->gtOp.gtOp1 = cmpOp1;
8471 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8473 #if FEATURE_SET_FLAGS
8474 // For comparisons against zero we will have the GTF_SET_FLAGS set
8475 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8476 // during the CSE phase.
8478 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8479 // as they are no longer feeding directly into a comparisons against zero
8481 // Make sure that the GTF_SET_FLAGS bit is cleared.
8482 // Fix 388436 ARM JitStress WP7
8483 c1->gtFlags &= ~GTF_SET_FLAGS;
8484 c2->gtFlags &= ~GTF_SET_FLAGS;
8486 // The new top level node that we just created does feed directly into
8487 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8488 // we generate an instuction that sets the flags, which allows us
8489 // to omit the cmp with zero instruction.
8491 // Request that the codegen for cmpOp1 sets the condition flags
8492 // when it generates the code for cmpOp1.
8494 cmpOp1->gtRequestSetFlags();
8497 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8500 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8504 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8508 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8510 fgRemoveRefPred(b1->bbJumpDest, b1);
8512 b1->bbJumpDest = b2->bbJumpDest;
8514 fgAddRefPred(b2->bbJumpDest, b1);
8517 noway_assert(edge1 != nullptr);
8518 noway_assert(edge2 != nullptr);
8520 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8521 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8522 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8524 edge1->flEdgeWeightMin = edgeSumMin;
8525 edge1->flEdgeWeightMax = edgeSumMax;
8529 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8530 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8533 /* Get rid of the second block (which is a BBJ_COND) */
8535 noway_assert(b1->bbJumpKind == BBJ_COND);
8536 noway_assert(b2->bbJumpKind == BBJ_COND);
8537 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8538 noway_assert(b1->bbNext == b2);
8539 noway_assert(b2->bbNext);
8542 b2->bbFlags |= BBF_REMOVED;
8544 // If b2 was the last block of a try or handler, update the EH table.
8546 ehUpdateForDeletedBlock(b2);
8548 /* Update bbRefs and bbPreds */
8550 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8551 * Remove pred 'b2' for 'b2->bbJumpDest' */
8553 fgReplacePred(b2->bbNext, b2, b1);
8555 fgRemoveRefPred(b2->bbJumpDest, b2);
8557 /* Update the block numbers and try again */
8569 // Update loop table
8570 fgUpdateLoopsAfterCompacting(b1, b2);
8575 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8576 b1->bbNum, b2->bbNum);
8585 fgDebugCheckBBlist();