1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
827 optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
830 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
832 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
834 else if (limitOp->gtOper == GT_ARR_LENGTH)
836 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
842 // Save the type of the comparison between the iterator and the limit.
843 optLoopTable[loopInd].lpTestTree = relop;
847 //----------------------------------------------------------------------------------
848 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
851 // incr The incr tree to be checked. Whether incr tree is
852 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
855 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
856 // and the rhs limit is extracted from the "test" tree. The limit information is
857 // added to the loop table.
860 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
862 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
865 genTreeOps updateOper;
866 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
867 if (iterVar != BAD_VAR_NUM)
869 // We have v = v op y type asg node.
882 // Increment should be by a const int.
883 // TODO-CQ: CLONE: allow variable increments.
884 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
893 //----------------------------------------------------------------------------------
894 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
897 // from, to - are blocks (beg, end) which are part of the loop.
898 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
899 // pIterVar - see return value.
902 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
906 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
907 // assigned in the loop.
909 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
912 unsigned iterVar = optIsLoopIncrTree(incr);
913 if (iterVar == BAD_VAR_NUM)
917 if (optIsVarAssigned(from, to, incr, iterVar))
919 JITDUMP("iterVar is assigned in loop\n");
927 //----------------------------------------------------------------------------------
928 // optIsLoopTestEvalIntoTemp:
929 // Pattern match if the test tree is computed into a tmp
930 // and the "tmp" is used as jump condition for loop termination.
933 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
934 // where Vtmp contains the actual loop test result.
935 // newStmt - contains the statement that is the actual test stmt involving
936 // the loop iterator.
939 // Returns true if a new test tree can be obtained.
942 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
943 // Then returns the rhs for def of Vtmp as the "test" node.
946 // This method just retrieves what it thinks is the "test" node,
947 // the callers are expected to verify that "iterVar" is used in the test.
949 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
951 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
953 if (test->gtOper != GT_JTRUE)
958 GenTreePtr relop = test->gtGetOp1();
959 noway_assert(relop->OperIsCompare());
961 GenTreePtr opr1 = relop->gtOp.gtOp1;
962 GenTreePtr opr2 = relop->gtOp.gtOp2;
964 // Make sure we have jtrue (vtmp != 0)
965 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
966 opr2->IsIntegralConst(0))
968 // Get the previous statement to get the def (rhs) of Vtmp to see
969 // if the "test" is evaluated into Vtmp.
970 GenTreePtr prevStmt = testStmt->gtPrev;
971 if (prevStmt == nullptr)
976 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
977 if (tree->OperGet() == GT_ASG)
979 GenTreePtr lhs = tree->gtOp.gtOp1;
980 GenTreePtr rhs = tree->gtOp.gtOp2;
982 // Return as the new test node.
983 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
985 if (rhs->OperIsCompare())
996 //----------------------------------------------------------------------------------
997 // optExtractInitTestIncr:
998 // Extract the "init", "test" and "incr" nodes of the loop.
1001 // head - Loop head block
1002 // bottom - Loop bottom block
1003 // top - Loop top block
1004 // ppInit - The init stmt of the loop if found.
1005 // ppTest - The test stmt of the loop if found.
1006 // ppIncr - The incr stmt of the loop if found.
1009 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1010 // returns true. Returns false if the information can't be extracted.
1013 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1014 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1015 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1018 // +-------<-----------------<-----------+
1021 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1023 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1024 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1025 // in the jtrue condition.
1028 // This method just retrieves what it thinks is the "test" node,
1029 // the callers are expected to verify that "iterVar" is used in the test.
1031 bool Compiler::optExtractInitTestIncr(
1032 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1034 assert(ppInit != nullptr);
1035 assert(ppTest != nullptr);
1036 assert(ppIncr != nullptr);
1038 // Check if last two statements in the loop body are the increment of the iterator
1039 // and the loop termination test.
1040 noway_assert(bottom->bbTreeList != nullptr);
1041 GenTreePtr test = bottom->bbTreeList->gtPrev;
1042 noway_assert(test != nullptr && test->gtNext == nullptr);
1045 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1050 // Check if we have the incr tree before the test tree, if we don't,
1051 // check if incr is part of the loop "top".
1052 GenTreePtr incr = test->gtPrev;
1053 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1055 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1060 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1061 GenTreePtr topLast = top->bbTreeList->gtPrev;
1062 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1072 assert(test != incr);
1074 // Find the last statement in the loop pre-header which we expect to be the initialization of
1075 // the loop iterator.
1076 GenTreePtr phdr = head->bbTreeList;
1077 if (phdr == nullptr)
1082 GenTreePtr init = phdr->gtPrev;
1083 noway_assert(init != nullptr && (init->gtNext == nullptr));
1085 // If it is a duplicated loop condition, skip it.
1086 if (init->gtFlags & GTF_STMT_CMPADD)
1088 bool doGetPrev = true;
1092 // Previous optimization passes may have inserted compiler-generated
1093 // statements other than duplicated loop conditions.
1094 doGetPrev = (init->gtPrev != nullptr);
1098 // Must be a duplicated loop condition.
1099 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1104 init = init->gtPrev;
1106 noway_assert(init != nullptr);
1109 noway_assert(init->gtOper == GT_STMT);
1110 noway_assert(test->gtOper == GT_STMT);
1111 noway_assert(incr->gtOper == GT_STMT);
1113 *ppInit = init->gtStmt.gtStmtExpr;
1114 *ppTest = test->gtStmt.gtStmtExpr;
1115 *ppIncr = incr->gtStmt.gtStmtExpr;
1120 /*****************************************************************************
1122 * Record the loop in the loop table.
1125 void Compiler::optRecordLoop(BasicBlock* head,
1131 unsigned char exitCnt)
1133 // Record this loop in the table, if there's room.
1135 assert(optLoopCount <= MAX_LOOP_NUM);
1136 if (optLoopCount == MAX_LOOP_NUM)
1139 loopOverflowThisMethod = true;
1144 // Assumed preconditions on the loop we're adding.
1145 assert(first->bbNum <= top->bbNum);
1146 assert(top->bbNum <= entry->bbNum);
1147 assert(entry->bbNum <= bottom->bbNum);
1148 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1150 // If the new loop contains any existing ones, add it in the right place.
1151 unsigned char loopInd = optLoopCount;
1152 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1154 unsigned char prev = prevPlus1 - 1;
1155 if (optLoopTable[prev].lpContainedBy(first, bottom))
1160 // Move up any loops if necessary.
1161 for (unsigned j = optLoopCount; j > loopInd; j--)
1163 optLoopTable[j] = optLoopTable[j - 1];
1167 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1169 // The loop is well-formed.
1170 assert(optLoopTable[i].lpWellFormed());
1171 // Check for disjoint.
1172 if (optLoopTable[i].lpDisjoint(first, bottom))
1176 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1177 assert(optLoopTable[i].lpContainedBy(first, bottom));
1181 optLoopTable[loopInd].lpHead = head;
1182 optLoopTable[loopInd].lpFirst = first;
1183 optLoopTable[loopInd].lpTop = top;
1184 optLoopTable[loopInd].lpBottom = bottom;
1185 optLoopTable[loopInd].lpEntry = entry;
1186 optLoopTable[loopInd].lpExit = exit;
1187 optLoopTable[loopInd].lpExitCnt = exitCnt;
1189 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1190 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1191 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1193 optLoopTable[loopInd].lpFlags = 0;
1195 // We haven't yet recorded any side effects.
1196 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1197 optLoopTable[loopInd].lpFieldsModified = nullptr;
1198 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1200 // If DO-WHILE loop mark it as such.
1201 if (head->bbNext == entry)
1203 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1206 // If single exit loop mark it as such.
1210 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1214 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1215 // We have the following restrictions:
1216 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1217 // 2. There must be a loop iterator (a local var) that is
1218 // incremented (decremented or lsh, rsh, mul) with a constant value
1219 // 3. The iterator is incremented exactly once
1220 // 4. The loop condition must use the iterator.
1222 if (bottom->bbJumpKind == BBJ_COND)
1227 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1232 unsigned iterVar = BAD_VAR_NUM;
1233 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1238 // Make sure the "iterVar" initialization is never skipped,
1239 // i.e. every pred of ENTRY other than HEAD is in the loop.
1240 for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
1242 BasicBlock* predBlock = predEdge->flBlock;
1243 if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
1249 if (!optPopulateInitInfo(loopInd, init, iterVar))
1254 // Check that the iterator is used in the loop condition.
1255 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1260 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1261 // Record the iterator, the pointer to the test node
1262 // and the initial value of the iterator (constant or local var)
1263 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1266 optLoopTable[loopInd].lpIterTree = incr;
1269 // Save the initial value of the iterator - can be lclVar or constant
1270 // Flag the loop accordingly.
1276 simpleTestLoopCount++;
1279 // Check if a constant iteration loop.
1280 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1282 // This is a constant loop.
1283 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1285 constIterLoopCount++;
1292 printf("\nConstant loop initializer:\n");
1295 printf("\nConstant loop body:\n");
1297 BasicBlock* block = head;
1300 block = block->bbNext;
1301 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1303 if (stmt->gtStmt.gtStmtExpr == incr)
1308 gtDispTree(stmt->gtStmt.gtStmtExpr);
1310 } while (block != bottom);
1316 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1321 //------------------------------------------------------------------------
1322 // optPrintLoopRecording: Print a recording of the loop.
1325 // loopInd - loop index.
1327 void Compiler::optPrintLoopRecording(unsigned loopInd)
1329 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1330 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1331 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1332 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1333 optLoopTable[loopInd].lpExit);
1335 // If an iterator loop print the iterator and the initialization.
1336 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1338 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1340 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1342 printf("%d )", optLoopTable[loopInd].lpIterConst());
1344 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1346 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1348 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1350 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1353 // If a simple test condition print operator and the limits */
1354 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1356 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1358 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1361 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1363 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1372 void Compiler::optCheckPreds()
1375 BasicBlock* blockPred;
1378 for (block = fgFirstBB; block; block = block->bbNext)
1380 for (pred = block->bbPreds; pred; pred = pred->flNext)
1382 // make sure this pred is part of the BB list
1383 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1385 if (blockPred == pred->flBlock)
1390 noway_assert(blockPred);
1391 switch (blockPred->bbJumpKind)
1394 if (blockPred->bbJumpDest == block)
1400 noway_assert(blockPred->bbNext == block);
1402 case BBJ_EHFILTERRET:
1404 case BBJ_EHCATCHRET:
1405 noway_assert(blockPred->bbJumpDest == block);
1416 /*****************************************************************************
1417 * Find the natural loops, using dominators. Note that the test for
1418 * a loop is slightly different from the standard one, because we have
1419 * not done a depth first reordering of the basic blocks.
1422 void Compiler::optFindNaturalLoops()
1427 printf("*************** In optFindNaturalLoops()\n");
1433 flowList* predEntry;
1435 noway_assert(fgDomsComputed);
1439 hasMethodLoops = false;
1440 loopsThisMethod = 0;
1441 loopOverflowThisMethod = false;
1444 /* We will use the following terminology:
1445 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1446 Not part of the looping of the loop.
1447 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1448 * but not the outer loop. ???)
1449 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1450 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1451 * EXIT - the loop exit or the block right after the bottom
1452 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1454 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1488 unsigned char exitCount;
1490 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1496 // Blocks that are rarely run have a zero bbWeight and should
1497 // never be optimized here
1499 if (top->bbWeight == BB_ZERO_WEIGHT)
1504 for (pred = top->bbPreds; pred; pred = pred->flNext)
1506 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1507 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1508 * as the definition says, but merely an indication that we have a loop there).
1509 * Thus, we have to be very careful and after entry discovery check that it is indeed
1510 * the only place we enter the loop (especially for non-reducible flow graphs).
1513 bottom = pred->flBlock;
1516 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1518 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1519 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1520 (bottom->bbJumpKind == BBJ_SWITCH))
1522 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1523 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1527 BasicBlock* loopBlock;
1529 /* The presence of a "back edge" is an indication that a loop might be present here
1532 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1533 * node in the loop to any other node in the loop (wholly within the loop)
1534 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1535 * in the loop from outside the loop, and that is through the ENTRY
1538 /* Let's find the loop ENTRY */
1540 if (head->bbJumpKind == BBJ_ALWAYS)
1542 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1544 /* OK - we enter somewhere within the loop */
1545 entry = head->bbJumpDest;
1547 /* some useful asserts
1548 * Cannot enter at the top - should have being caught by redundant jumps */
1550 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1554 /* special case - don't consider now */
1555 // assert (!"Loop entered in weird way!");
1559 // Can we fall through into the loop?
1560 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1562 /* The ENTRY is at the TOP (a do-while loop) */
1567 goto NO_LOOP; // head does not flow into the loop bail for now
1570 // Now we find the "first" block -- the earliest block reachable within the loop.
1571 // This is usually the same as "top", but can differ in rare cases where "top" is
1572 // the entry block of a nested loop, and that nested loop branches backwards to a
1573 // a block before "top". We find this by searching for such backwards branches
1574 // in the loop known so far.
1575 BasicBlock* first = top;
1576 BasicBlock* newFirst;
1577 bool blocksToSearch = true;
1578 BasicBlock* validatedAfter = bottom->bbNext;
1579 while (blocksToSearch)
1581 blocksToSearch = false;
1583 blocksToSearch = false;
1584 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1586 unsigned nSucc = loopBlock->NumSucc();
1587 for (unsigned j = 0; j < nSucc; j++)
1589 BasicBlock* succ = loopBlock->GetSucc(j);
1590 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1591 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1597 if (newFirst != nullptr)
1599 validatedAfter = first;
1601 blocksToSearch = true;
1605 // Is "head" still before "first"? If not, we don't have a valid loop...
1606 if (head->bbNum >= first->bbNum)
1609 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1610 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1614 /* Make sure ENTRY dominates all blocks in the loop
1615 * This is necessary to ensure condition 2. above
1616 * At the same time check if the loop has a single exit
1617 * point - those loops are easier to optimize */
1619 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1621 if (!fgDominate(entry, loopBlock))
1626 if (loopBlock == bottom)
1628 if (bottom->bbJumpKind != BBJ_ALWAYS)
1630 /* there is an exit at the bottom */
1632 noway_assert(bottom->bbJumpDest == top);
1639 BasicBlock* exitPoint;
1641 switch (loopBlock->bbJumpKind)
1644 case BBJ_CALLFINALLY:
1646 case BBJ_EHCATCHRET:
1647 assert(loopBlock->bbJumpDest);
1648 exitPoint = loopBlock->bbJumpDest;
1650 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1652 /* exit from a block other than BOTTOM */
1661 case BBJ_EHFINALLYRET:
1662 case BBJ_EHFILTERRET:
1663 /* The "try" associated with this "finally" must be in the
1664 * same loop, so the finally block will return control inside the loop */
1669 /* those are exits from the loop */
1677 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1678 BasicBlock** jumpTab;
1679 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1683 noway_assert(*jumpTab);
1684 exitPoint = *jumpTab;
1686 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1691 } while (++jumpTab, --jumpCnt);
1695 noway_assert(!"Unexpected bbJumpKind");
1700 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1701 * This is to ensure condition 1. above which prevents marking fake loops
1703 * Below is an example:
1711 * The example above is not a loop since we bail after the first iteration
1713 * The condition we have to check for is
1714 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1715 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1717 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1718 * loop bottom then we cannot iterate
1720 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1721 * part of the loop nodes (as per definition they are loop exits executed only once),
1722 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1724 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1726 switch (loopBlock->bbJumpKind)
1731 if (fgDominate(loopBlock, bottom))
1740 bool canIterateLoop = false;
1742 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1744 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1746 canIterateLoop = true;
1749 else if (predEntry->flBlock != head)
1751 // The entry block has multiple predecessors outside the loop; the 'head'
1752 // block isn't the only one. We only support a single 'head', so bail.
1757 if (!canIterateLoop)
1762 /* Double check - make sure that all loop blocks except ENTRY
1763 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1764 * us from considering non-loops due to incorrectly assuming that we had a back edge
1767 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1770 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1772 if (loopBlock == entry)
1777 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1779 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1781 // noway_assert(!"Found loop with multiple entries");
1787 // Disqualify loops where the first block of the loop is less nested in EH than
1788 // the bottom block. That is, we don't want to handle loops where the back edge
1789 // goes from within an EH region to a first block that is outside that same EH
1790 // region. Note that we *do* handle loops where the first block is the *first*
1791 // block of a more nested EH region (since it is legal to branch to the first
1792 // block of an immediately more nested EH region). So, for example, disqualify
1799 // BB10 BBJ_COND => BB02
1803 // Here, BB10 is more nested than BB02.
1805 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1807 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1809 first->bbNum, bottom->bbNum);
1813 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1814 // Disqualify loops where the first block of the loop is a finally target.
1815 // The main problem is when multiple loops share a 'first' block that is a finally
1816 // target and we canonicalize the loops by adding a new loop head. In that case, we
1817 // need to update the blocks so the finally target bit is moved to the newly created
1818 // block, and removed from the old 'first' block. This is 'hard', so at this point
1819 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1820 // long-term), it's easier to disallow the loop than to update the flow graph to
1821 // support this case.
1823 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1825 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1828 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1830 /* At this point we have a loop - record it in the loop table
1831 * If we found only one exit, record it in the table too
1832 * (otherwise an exit = 0 in the loop table means multiple exits) */
1839 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1842 if (!hasMethodLoops)
1844 /* mark the method as containing natural loops */
1846 hasMethodLoops = true;
1849 /* increment total number of loops found */
1853 /* keep track of the number of exits */
1854 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1855 #endif // COUNT_LOOPS
1858 /* current predecessor not good for a loop - continue with another one, if any */
1864 loopCountTable.record(loopsThisMethod);
1865 if (maxLoopsPerMethod < loopsThisMethod)
1867 maxLoopsPerMethod = loopsThisMethod;
1869 if (loopOverflowThisMethod)
1871 totalLoopOverflows++;
1873 #endif // COUNT_LOOPS
1875 // Now the loop indices are stable. We can figure out parent/child relationships
1876 // (using table indices to name loops), and label blocks.
1877 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1879 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1882 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1884 optLoopTable[loopInd].lpParent = possibleParent;
1885 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1886 optLoopTable[possibleParent].lpChild = loopInd;
1892 // Now label the blocks with the innermost loop to which they belong. Since parents
1893 // precede children in the table, doing the labeling for each loop in order will achieve
1894 // this -- the innermost loop labeling will be done last.
1895 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1897 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1898 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1899 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1901 blk->bbNatLoopNum = loopInd;
1906 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1910 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1911 // one, if necessary, for loops containing others that share a "top."
1913 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1915 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1916 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1922 if (optCanonicalizeLoopNest(loopInd))
1929 fgUpdateChangedFlowGraph();
1933 if (verbose && optLoopCount > 0)
1935 printf("\nFinal natural loop table:\n");
1936 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1938 optPrintLoopInfo(loopInd);
1945 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1947 BasicBlock* newJumpDest = nullptr;
1948 switch (blk->bbJumpKind)
1953 case BBJ_EHFILTERRET:
1954 case BBJ_EHFINALLYRET:
1955 case BBJ_EHCATCHRET:
1956 // These have no jump destination to update.
1961 case BBJ_CALLFINALLY:
1963 // All of these have a single jump destination to update.
1964 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1966 blk->bbJumpDest = newJumpDest;
1972 bool redirected = false;
1973 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1975 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1977 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1981 // If any redirections happend, invalidate the switch table map for the switch.
1984 GetSwitchDescMap()->Remove(blk);
1994 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1995 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1997 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1999 // copy the jump destination(s) from "from" to "to".
2000 switch (to->bbJumpKind)
2004 case BBJ_CALLFINALLY:
2006 // All of these have a single jump destination to update.
2007 to->bbJumpDest = from->bbJumpDest;
2012 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
2013 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
2014 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
2016 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
2018 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2028 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2029 // Returns 'true' if the flow graph is modified.
2030 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2032 bool modified = false;
2034 // Is the top of the current loop not in any nested loop?
2035 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2037 if (optCanonicalizeLoop(loopInd))
2043 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2044 child = optLoopTable[child].lpSibling)
2046 if (optCanonicalizeLoopNest(child))
2055 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2057 // Is the top uniquely part of the current loop?
2058 BasicBlock* t = optLoopTable[loopInd].lpTop;
2060 if (t->bbNatLoopNum == loopInd)
2065 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2067 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2069 // Otherwise, the top of this loop is also part of a nested loop.
2071 // Insert a new unique top for this loop. We must be careful to put this new
2072 // block in the correct EH region. Note that f->bbPrev might be in a different
2073 // EH region. For example:
2081 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2082 // On the other hand, the first block of multiple loops might be the first
2083 // block of a 'try' region that is completely contained in the multiple loops.
2088 // BB10 BBJ_ALWAYS => BB08
2090 // BB12 BBJ_ALWAYS => BB08
2092 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2093 // is a single-block "try" region. Neither loop "bottom" block is in the same
2094 // "try" region as BB08. This is legal because you can jump to the first block
2095 // of a try region. With EH normalization, no two "try" regions will share
2096 // this block. In this case, we need to insert a new block for the outer loop
2097 // in the same EH region as the branch from the "bottom":
2102 // BB10 BBJ_ALWAYS => BB08
2104 // BB12 BBJ_ALWAYS => BB30
2106 // Another possibility is that the "first" block of the loop nest can be the first block
2107 // of a "try" region that also has other predecessors than those in the loop, or even in
2108 // the "try" region (since blocks can target the first block of a "try" region). For example:
2112 // BB10 BBJ_ALWAYS => BB08
2114 // BB12 BBJ_ALWAYS => BB08
2117 // BB20 BBJ_ALWAYS => BB08
2119 // BB25 BBJ_ALWAYS => BB08
2121 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2122 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2123 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2124 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2125 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2126 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2127 // situation of branching to a non-first block of a 'try' region.
2129 // We can also have a loop nest where the "first" block is outside of a "try" region
2130 // and the back edges are inside a "try" region, for example:
2134 // BB09 try { BBJ_COND => BB02
2136 // BB15 BBJ_COND => BB02
2138 // BB21 } // end of "try"
2140 // In this case, both loop back edges were formed by "leave" instructions that were
2141 // imported into branches that were later made conditional. In this case, we don't
2142 // want to copy the EH region of the back edge, since that would create a block
2143 // outside of and disjoint with the "try" region of the back edge. However, to
2144 // simplify things, we disqualify this type of loop, so we should never see this here.
2146 BasicBlock* h = optLoopTable[loopInd].lpHead;
2147 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2148 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2150 // The loop must be entirely contained within a single handler region.
2151 assert(BasicBlock::sameHndRegion(f, b));
2153 // If the bottom block is in the same "try" region, then we extend the EH
2154 // region. Otherwise, we add the new block outside the "try" region.
2155 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2156 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2159 // We need to set the EH region manually. Set it to be the same
2160 // as the bottom block.
2161 newT->copyEHRegion(b);
2164 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2166 // Redirect the "bottom" of the current loop to "newT".
2167 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2168 blockMap->Set(t, newT);
2169 optRedirectBlock(b, blockMap);
2171 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2172 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2173 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2174 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2177 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2178 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2179 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2180 // edge of the blockMap, so nothing will happen.
2181 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2183 BasicBlock* topPredBlock = topPred->flBlock;
2185 // Skip if topPredBlock is in the loop.
2186 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2187 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2188 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2189 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2191 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2192 "redirecting its bottom edge\n",
2193 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2197 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2199 optRedirectBlock(topPredBlock, blockMap);
2202 assert(newT->bbNext == f);
2205 newT->bbJumpKind = BBJ_ALWAYS;
2206 newT->bbJumpDest = t;
2207 newT->bbTreeList = nullptr;
2208 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2211 // If it had been a do-while loop (top == entry), update entry, as well.
2212 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2213 if (optLoopTable[loopInd].lpTop == origE)
2215 optLoopTable[loopInd].lpEntry = newT;
2217 optLoopTable[loopInd].lpTop = newT;
2218 optLoopTable[loopInd].lpFirst = newT;
2220 newT->bbNatLoopNum = loopInd;
2222 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2223 dspPtr(newT), loopInd);
2225 // Make sure the head block still goes to the entry...
2226 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2228 h->bbJumpKind = BBJ_ALWAYS;
2229 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2231 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2233 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2234 optLoopTable[loopInd].lpHead = h2;
2235 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2236 h2->bbTreeList = nullptr;
2237 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2240 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2241 // it must be the case that they were do-while's (since "h" fell through to the entry).
2242 // The new node "newT" becomes the head of such loops.
2243 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2244 childLoop = optLoopTable[childLoop].lpSibling)
2246 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2247 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2249 optUpdateLoopHead(childLoop, h, newT);
2255 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2257 assert(l1 != BasicBlock::NOT_IN_LOOP);
2262 else if (l2 == BasicBlock::NOT_IN_LOOP)
2268 return optLoopContains(l1, optLoopTable[l2].lpParent);
2272 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2274 assert(optLoopTable[loopInd].lpHead == from);
2275 optLoopTable[loopInd].lpHead = to;
2276 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2277 childLoop = optLoopTable[childLoop].lpSibling)
2279 if (optLoopTable[childLoop].lpHead == from)
2281 optUpdateLoopHead(childLoop, from, to);
2286 /*****************************************************************************
2287 * If the : i += const" will cause an overflow exception for the small types.
2290 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2297 type_MAX = SCHAR_MAX;
2300 type_MAX = UCHAR_MAX;
2303 type_MAX = SHRT_MAX;
2306 type_MAX = USHRT_MAX;
2309 case TYP_UINT: // Detected by checking for 32bit ....
2311 return false; // ... overflow same as done for TYP_INT
2317 if (iterAtExit > type_MAX)
2327 /*****************************************************************************
2328 * If the "i -= const" will cause an underflow exception for the small types
2331 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2338 type_MIN = SCHAR_MIN;
2341 type_MIN = SHRT_MIN;
2350 case TYP_UINT: // Detected by checking for 32bit ....
2352 return false; // ... underflow same as done for TYP_INT
2358 if (iterAtExit < type_MIN)
2368 /*****************************************************************************
2370 * Helper for unroll loops - Computes the number of repetitions
2371 * in a constant loop. If it cannot prove the number is constant returns false
2374 bool Compiler::optComputeLoopRep(int constInit,
2377 genTreeOps iterOper,
2378 var_types iterOperType,
2379 genTreeOps testOper,
2382 unsigned* iterCount)
2384 noway_assert(genActualType(iterOperType) == TYP_INT);
2387 __int64 constLimitX;
2392 // Using this, we can just do a signed comparison with other 32 bit values.
2395 constLimitX = (unsigned int)constLimit;
2399 constLimitX = (signed int)constLimit;
2402 switch (iterOperType)
2404 // For small types, the iteration operator will narrow these values if big
2406 #define INIT_ITER_BY_TYPE(type) \
2407 constInitX = (type)constInit; \
2408 iterInc = (type)iterInc;
2411 INIT_ITER_BY_TYPE(signed char);
2414 INIT_ITER_BY_TYPE(unsigned char);
2417 INIT_ITER_BY_TYPE(signed short);
2420 INIT_ITER_BY_TYPE(unsigned short);
2423 // For the big types, 32 bit arithmetic is performed
2429 constInitX = (unsigned int)constInit;
2433 constInitX = (signed int)constInit;
2438 noway_assert(!"Bad type");
2442 /* If iterInc is zero we have an infinite loop */
2448 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2449 iterSign = (iterInc > 0) ? +1 : -1;
2451 /* Initialize loopCount to zero */
2454 // If dupCond is true then the loop head contains a test which skips
2455 // this loop, if the constInit does not pass the loop test
2456 // Such a loop can execute zero times.
2457 // If dupCond is false then we have a true do-while loop which we
2458 // always execute the loop once before performing the loop test
2462 constInitX += iterInc;
2465 // bail if count is based on wrap-around math
2468 if (constLimitX < constInitX)
2473 else if (constLimitX > constInitX)
2478 /* Compute the number of repetitions */
2482 __int64 iterAtExitX;
2485 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2489 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2490 * have a constant number of iterations or loop forever -
2491 * we have to compute (lim-init) mod iterInc to see if it is zero.
2492 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2493 * which is probably not what the end user wanted, but it is legal.
2498 /* Stepping by one, i.e. Mod with 1 is always zero */
2501 if (((constLimitX - constInitX) % iterInc) != 0)
2509 noway_assert(iterInc < 0);
2510 /* Stepping by -1, i.e. Mod with 1 is always zero */
2513 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2529 if (constInitX != constLimitX)
2531 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2534 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2538 iterAtExitX = (unsigned)iterAtExitX;
2541 // Check if iteration incr will cause overflow for small types
2542 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2547 // iterator with 32bit overflow. Bad for TYP_(U)INT
2548 if (iterAtExitX < constLimitX)
2553 *iterCount = loopCount;
2569 noway_assert(!"Unknown operator for loop iterator");
2583 if (constInitX < constLimitX)
2585 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2588 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2592 iterAtExitX = (unsigned)iterAtExitX;
2595 // Check if iteration incr will cause overflow for small types
2596 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2601 // iterator with 32bit overflow. Bad for TYP_(U)INT
2602 if (iterAtExitX < constLimitX)
2607 *iterCount = loopCount;
2623 noway_assert(!"Unknown operator for loop iterator");
2637 if (constInitX <= constLimitX)
2639 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2642 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2646 iterAtExitX = (unsigned)iterAtExitX;
2649 // Check if iteration incr will cause overflow for small types
2650 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2655 // iterator with 32bit overflow. Bad for TYP_(U)INT
2656 if (iterAtExitX <= constLimitX)
2661 *iterCount = loopCount;
2677 noway_assert(!"Unknown operator for loop iterator");
2691 if (constInitX > constLimitX)
2693 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2696 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2700 iterAtExitX = (unsigned)iterAtExitX;
2703 // Check if small types will underflow
2704 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2709 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2710 if (iterAtExitX > constLimitX)
2715 *iterCount = loopCount;
2731 noway_assert(!"Unknown operator for loop iterator");
2745 if (constInitX >= constLimitX)
2747 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2750 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2754 iterAtExitX = (unsigned)iterAtExitX;
2757 // Check if small types will underflow
2758 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2763 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2764 if (iterAtExitX >= constLimitX)
2769 *iterCount = loopCount;
2785 noway_assert(!"Unknown operator for loop iterator");
2790 noway_assert(!"Unknown operator for loop condition");
2796 /*****************************************************************************
2798 * Look for loop unrolling candidates and unroll them
2802 #pragma warning(push)
2803 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2805 void Compiler::optUnrollLoops()
2807 if (compCodeOpt() == SMALL_CODE)
2812 if (optLoopCount == 0)
2818 if (JitConfig.JitNoUnroll())
2824 if (optCanCloneLoops())
2832 printf("*************** In optUnrollLoops()\n");
2835 /* Look for loop unrolling candidates */
2837 /* Double loop so that after unrolling an inner loop we set change to true
2838 * and we then go back over all of the loop candidates and try to unroll
2839 * the next outer loop, until we don't unroll any loops,
2840 * then change will be false and we are done.
2844 bool change = false;
2846 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2860 int lbeg; // initial value for iterator
2861 int llim; // limit value for iterator
2862 unsigned lvar; // iterator lclVar #
2863 int iterInc; // value to increment the iterator
2864 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2865 var_types iterOperType; // type result of the oper (for overflow instrs)
2866 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2867 bool unsTest; // Is the comparison u/int
2869 unsigned totalIter; // total number of iterations in the constant loop
2870 unsigned loopCostSz; // Cost is size of one iteration
2871 unsigned loopFlags; // actual lpFlags
2872 unsigned requiredFlags; // required lpFlags
2874 GenTree* loopList; // new stmt list of the unrolled loop
2877 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2884 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2885 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2887 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2890 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2896 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2903 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2904 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2906 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2909 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2911 unrollLimitSz *= 10;
2915 loopFlags = optLoopTable[lnum].lpFlags;
2916 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2918 /* Ignore the loop if we don't have a do-while with a single exit
2919 that has a constant number of iterations */
2921 if ((loopFlags & requiredFlags) != requiredFlags)
2926 /* ignore if removed or marked as not unrollable */
2928 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2933 head = optLoopTable[lnum].lpHead;
2935 bottom = optLoopTable[lnum].lpBottom;
2936 noway_assert(bottom);
2938 /* The single exit must be at the bottom of the loop */
2939 noway_assert(optLoopTable[lnum].lpExit);
2940 if (optLoopTable[lnum].lpExit != bottom)
2945 /* Unrolling loops with jumps in them is not worth the headache
2946 * Later we might consider unrolling loops after un-switching */
2951 block = block->bbNext;
2952 noway_assert(block);
2954 if (block->bbJumpKind != BBJ_NONE)
2956 if (block != bottom)
2961 } while (block != bottom);
2963 /* Get the loop data:
2967 - iterator increment
2968 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2969 - loop test type (i.e. GT_GE, GT_LT, etc...)
2972 lbeg = optLoopTable[lnum].lpConstInit;
2973 llim = optLoopTable[lnum].lpConstLimit();
2974 testOper = optLoopTable[lnum].lpTestOper();
2976 lvar = optLoopTable[lnum].lpIterVar();
2977 iterInc = optLoopTable[lnum].lpIterConst();
2978 iterOper = optLoopTable[lnum].lpIterOper();
2980 iterOperType = optLoopTable[lnum].lpIterOperType();
2981 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2983 if (lvaTable[lvar].lvAddrExposed)
2984 { // If the loop iteration variable is address-exposed then bail
2987 if (lvaTable[lvar].lvIsStructField)
2988 { // If the loop iteration variable is a promoted field from a struct then
2993 /* Locate the pre-header and initialization and increment/test statements */
2995 phdr = head->bbTreeList;
2997 loop = bottom->bbTreeList;
3000 init = head->lastStmt();
3001 noway_assert(init && (init->gtNext == nullptr));
3002 test = bottom->lastStmt();
3003 noway_assert(test && (test->gtNext == nullptr));
3004 incr = test->gtPrev;
3007 if (init->gtFlags & GTF_STMT_CMPADD)
3009 /* Must be a duplicated loop condition */
3010 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3013 init = init->gtPrev;
3021 /* Find the number of iterations - the function returns false if not a constant number */
3023 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3028 /* Forget it if there are too many repetitions or not a constant loop */
3030 if (totalIter > iterLimit)
3035 noway_assert(init->gtOper == GT_STMT);
3036 init = init->gtStmt.gtStmtExpr;
3037 noway_assert(test->gtOper == GT_STMT);
3038 test = test->gtStmt.gtStmtExpr;
3039 noway_assert(incr->gtOper == GT_STMT);
3040 incr = incr->gtStmt.gtStmtExpr;
3042 // Don't unroll loops we don't understand.
3043 if (incr->gtOper == GT_ASG)
3048 /* Make sure everything looks ok */
3049 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3050 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3051 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3053 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
3054 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
3055 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3057 (test->gtOper != GT_JTRUE))
3059 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3063 /* heuristic - Estimated cost in code size of the unrolled loop */
3071 block = block->bbNext;
3073 /* Visit all the statements in the block */
3075 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3077 /* Get the expression and stop if end reached */
3079 GenTreePtr expr = stmt->gtStmtExpr;
3085 /* Calculate gtCostSz */
3086 gtSetStmtInfo(stmt);
3088 /* Update loopCostSz */
3089 loopCostSz += stmt->gtCostSz;
3091 } while (block != bottom);
3093 /* Compute the estimated increase in code size for the unrolled loop */
3095 unsigned int fixedLoopCostSz;
3096 fixedLoopCostSz = 8;
3099 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3101 /* Don't unroll if too much code duplication would result. */
3103 if (unrollCostSz > unrollLimitSz)
3105 /* prevent this loop from being revisited */
3106 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3110 /* Looks like a good idea to unroll this loop, let's do it! */
3111 CLANG_FORMAT_COMMENT_ANCHOR;
3116 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3117 if (head->bbNext->bbNum != bottom->bbNum)
3119 printf("..BB%02u", bottom->bbNum);
3121 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3122 printf(" unrollCostSz = %d\n", unrollCostSz);
3127 /* Create the unrolled loop statement list */
3129 loopList = loopLast = nullptr;
3131 for (lval = lbeg; totalIter; totalIter--)
3140 block = block->bbNext;
3141 noway_assert(block);
3143 /* Visit all the statements in the block */
3145 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3147 /* Stop if we've reached the end of the loop */
3149 if (stmt->gtStmtExpr == incr)
3154 /* Clone/substitute the expression */
3156 expr = gtCloneExpr(stmt, 0, lvar, lval);
3158 // cloneExpr doesn't handle everything
3162 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3166 /* Append the expression to our list */
3170 loopLast->gtNext = expr;
3177 expr->gtPrev = loopLast;
3180 } while (block != bottom);
3182 /* update the new value for the unrolled iterator */
3196 noway_assert(!"Unrolling not implemented for this loop iterator");
3200 noway_assert(!"Unknown operator for constant loop iterator");
3205 /* Finish the linked list */
3209 loopList->gtPrev = loopLast;
3210 loopLast->gtNext = nullptr;
3213 /* Replace the body with the unrolled one */
3219 block = block->bbNext;
3220 noway_assert(block);
3221 block->bbTreeList = nullptr;
3222 block->bbJumpKind = BBJ_NONE;
3223 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3224 } while (block != bottom);
3226 bottom->bbJumpKind = BBJ_NONE;
3227 bottom->bbTreeList = loopList;
3228 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3229 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3233 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3235 /* Update bbRefs and bbPreds */
3236 /* Here head->bbNext is bottom !!! - Replace it */
3238 fgRemoveRefPred(head->bbNext, bottom);
3240 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3241 * (the last value of the iterator in the loop)
3242 * and drop the jump condition since the unrolled loop will always execute */
3244 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3246 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3248 if (head->bbJumpKind == BBJ_COND)
3250 phdr = head->bbTreeList;
3252 test = phdr->gtPrev;
3254 noway_assert(test && (test->gtNext == nullptr));
3255 noway_assert(test->gtOper == GT_STMT);
3256 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3258 init = test->gtPrev;
3259 noway_assert(init && (init->gtNext == test));
3260 noway_assert(init->gtOper == GT_STMT);
3262 init->gtNext = nullptr;
3263 phdr->gtPrev = init;
3264 head->bbJumpKind = BBJ_NONE;
3265 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3267 /* Update bbRefs and bbPreds */
3269 fgRemoveRefPred(head->bbJumpDest, head);
3273 /* the loop must execute */
3274 noway_assert(head->bbJumpKind == BBJ_NONE);
3280 printf("Whole unrolled loop:\n");
3282 GenTreePtr s = loopList;
3286 noway_assert(s->gtOper == GT_STMT);
3297 /* Remember that something has changed */
3301 /* Make sure to update loop table */
3303 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3304 * (also make head and bottom NULL - to hit an assert or GPF) */
3306 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3307 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3319 fgDebugCheckBBlist();
3323 #pragma warning(pop)
3326 /*****************************************************************************
3328 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3329 * not execute a method call.
3332 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3334 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3335 // as some helper calls are neither interruptible nor hijackable.
3336 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3337 // those helpers too.
3339 noway_assert(topBB->bbNum <= botBB->bbNum);
3341 // We can always check topBB and botBB for any gc safe points and early out
3343 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3348 // Otherwise we will need to rely upon the dominator sets
3350 if (!fgDomsComputed)
3352 // return a conservative answer of true when we don't have the dominator sets
3356 BasicBlock* curBB = topBB;
3359 noway_assert(curBB);
3361 // If we added a loop pre-header block then we will
3362 // have a bbNum greater than fgLastBB, and we won't have
3363 // any dominator information about this block, so skip it.
3365 if (curBB->bbNum <= fgLastBB->bbNum)
3367 noway_assert(curBB->bbNum <= botBB->bbNum);
3369 // Does this block contain a gc safe point?
3371 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3373 // Will this block always execute on the way to botBB ?
3375 // Since we are checking every block in [topBB .. botBB] and we are using
3376 // a lexical definition of a loop.
3377 // (all that we know is that is that botBB is a back-edge to topBB)
3378 // Thus while walking blocks in this range we may encounter some blocks
3379 // that are not really part of the loop, and so we need to perform
3380 // some additional checks:
3382 // We will check that the current 'curBB' is reachable from 'topBB'
3383 // and that it dominates the block containing the back-edge 'botBB'
3384 // When both of these are true then we know that the gcsafe point in 'curBB'
3385 // will be encountered in the loop and we can return false
3387 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3394 // If we've reached the destination block, then we're done
3403 curBB = curBB->bbNext;
3406 // If we didn't find any blocks that contained a gc safe point and
3407 // also met the fgDominate and fgReachable criteria then we must return true
3412 /*****************************************************************************
3414 * Find the loop termination test at the bottom of the loop
3417 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3419 GenTreePtr testt = bottom->bbTreeList;
3421 assert(testt && testt->gtOper == GT_STMT);
3423 GenTreePtr result = testt->gtPrev;
3426 while (testt->gtNext)
3428 testt = testt->gtNext;
3431 assert(testt == result);
3437 /*****************************************************************************
3438 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3441 void Compiler::fgOptWhileLoop(BasicBlock* block)
3443 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3444 noway_assert(compCodeOpt() != SMALL_CODE);
3447 Optimize while loops into do { } while loop
3448 Our loop hoisting logic requires do { } while loops.
3449 Specifically, we're looking for the following case:
3460 If we find this, and the condition is simple enough, we change
3461 the loop to the following:
3466 // else fall-through
3477 /* Does the BB end with an unconditional jump? */
3479 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3480 { // It can't be one of the ones we use for our exception magic
3484 // It has to be a forward jump
3485 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3487 if (fgIsForwardBranch(block) == false)
3492 // Get hold of the jump target
3493 BasicBlock* bTest = block->bbJumpDest;
3495 // Does the block consist of 'jtrue(cond) block' ?
3496 if (bTest->bbJumpKind != BBJ_COND)
3501 // bTest must be a backwards jump to block->bbNext
3502 if (bTest->bbJumpDest != block->bbNext)
3507 // Since test is a BBJ_COND it will have a bbNext
3508 noway_assert(bTest->bbNext);
3510 // 'block' must be in the same try region as the condition, since we're going to insert
3511 // a duplicated condition in 'block', and the condition might include exception throwing code.
3512 if (!BasicBlock::sameTryRegion(block, bTest))
3517 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3518 // same try region (or no try region) to avoid generating illegal flow.
3519 BasicBlock* bTestNext = bTest->bbNext;
3520 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3525 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3527 // bTest must only contain only a jtrue with no other stmts, we will only clone
3528 // the conditional, so any other statements will not get cloned
3529 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3531 if (bTest->bbTreeList != condStmt)
3536 /* Get to the condition node from the statement tree */
3538 noway_assert(condStmt->gtOper == GT_STMT);
3540 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3541 noway_assert(condTree->gtOper == GT_JTRUE);
3543 condTree = condTree->gtOp.gtOp1;
3545 // The condTree has to be a RelOp comparison
3546 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3548 if (condTree->OperIsCompare() == false)
3553 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3555 gtPrepareCost(condTree);
3556 unsigned estDupCostSz = condTree->gtCostSz;
3558 double loopIterations = (double)BB_LOOP_WEIGHT;
3560 bool allProfileWeightsAreValid = false;
3561 BasicBlock::weight_t weightBlock = block->bbWeight;
3562 BasicBlock::weight_t weightTest = bTest->bbWeight;
3563 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3565 // If we have profile data then we calculate the number of time
3566 // the loop will iterate into loopIterations
3567 if (fgIsUsingProfileWeights())
3569 // Only rely upon the profile weight when all three of these blocks
3570 // have good profile weights
3571 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3572 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3574 allProfileWeightsAreValid = true;
3576 // If this while loop never iterates then don't bother transforming
3577 if (weightNext == 0)
3582 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3583 // if the profile weights are all valid.
3585 // weightNext is the number of time this loop iterates
3586 // weightBlock is the number of times that we enter the while loop
3587 // loopIterations is the average number of times that this loop iterates
3589 if (weightTest >= weightBlock)
3591 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3596 unsigned maxDupCostSz = 32;
3598 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3599 // set loop weights yet
3600 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3605 // If this loop iterates a lot then raise the maxDupCost
3606 if (loopIterations >= 12.0)
3610 if (loopIterations >= 96.0)
3615 // If the loop condition has a shared static helper, we really want this loop converted
3616 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3617 // be executed on every loop iteration.
3618 int countOfHelpers = 0;
3619 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3621 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3623 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3626 // If the compare has too high cost then we don't want to dup
3628 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3633 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3634 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3635 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3636 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3637 allProfileWeightsAreValid ? "true" : "false");
3646 /* Looks good - duplicate the condition test */
3648 condTree->gtFlags |= GTF_RELOP_ZTT;
3650 condTree = gtCloneExpr(condTree);
3651 gtReverseCond(condTree);
3653 // Make sure clone expr copied the flag
3654 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3656 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3658 /* Create a statement entry out of the condition and
3659 append the condition test at the end of 'block' */
3661 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3663 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3665 if (opts.compDbgInfo)
3667 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3670 // Flag the block that received the copy as potentially having an array/vtable
3671 // reference if the block copied from did; this is a conservative guess.
3672 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3674 block->bbFlags |= copyFlags;
3677 // If we have profile data for all blocks and we know that we are cloning the
3678 // bTest block into block and thus changing the control flow from block so
3679 // that it no longer goes directly to bTest anymore, we have to adjust the
3680 // weight of bTest by subtracting out the weight of block.
3682 if (allProfileWeightsAreValid)
3685 // Some additional sanity checks before adjusting the weight of bTest
3687 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3689 // Get the two edge that flow out of bTest
3690 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3691 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3693 // Calculate the new weight for block bTest
3695 BasicBlock::weight_t newWeightTest =
3696 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3697 bTest->bbWeight = newWeightTest;
3699 if (newWeightTest == BB_ZERO_WEIGHT)
3701 bTest->bbFlags |= BBF_RUN_RARELY;
3702 // All out edge weights are set to zero
3703 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3704 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3705 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3706 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3710 // Update the our edge weights
3711 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3712 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3713 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3714 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3719 /* Change the block to end with a conditional jump */
3721 block->bbJumpKind = BBJ_COND;
3722 block->bbJumpDest = bTest->bbNext;
3724 /* Mark the jump dest block as being a jump target */
3725 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3727 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3729 fgAddRefPred(block->bbNext, block);
3731 fgRemoveRefPred(bTest, block);
3732 fgAddRefPred(bTest->bbNext, block);
3737 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3739 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3741 gtDispTree(copyOfCondStmt);
3747 /*****************************************************************************
3749 * Optimize the BasicBlock layout of the method
3752 void Compiler::optOptimizeLayout()
3754 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3759 printf("*************** In optOptimizeLayout()\n");
3763 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3764 fgDebugCheckBBlist();
3767 noway_assert(fgModified == false);
3769 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3771 /* Make sure the appropriate fields are initialized */
3773 if (block->bbWeight == BB_ZERO_WEIGHT)
3775 /* Zero weighted block can't have a LOOP_HEAD flag */
3776 noway_assert(block->isLoopHead() == false);
3780 assert(block->bbLoopNum == 0);
3782 if (compCodeOpt() != SMALL_CODE)
3784 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3786 fgOptWhileLoop(block);
3792 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3793 fgComputeEdgeWeights();
3796 fgUpdateFlowGraph(true);
3798 fgUpdateFlowGraph();
3801 /*****************************************************************************
3803 * Perform loop inversion, find and classify natural loops
3806 void Compiler::optOptimizeLoops()
3808 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3813 printf("*************** In optOptimizeLoops()\n");
3817 optSetBlockWeights();
3819 /* Were there any loops in the flow graph? */
3823 /* now that we have dominator information we can find loops */
3825 optFindNaturalLoops();
3827 unsigned loopNum = 0;
3829 /* Iterate over the flow graph, marking all loops */
3831 /* We will use the following terminology:
3832 * top - the first basic block in the loop (i.e. the head of the backward edge)
3833 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3834 * lastBottom - used when we have multiple back-edges to the same top
3841 for (top = fgFirstBB; top; top = top->bbNext)
3843 BasicBlock* foundBottom = nullptr;
3845 for (pred = top->bbPreds; pred; pred = pred->flNext)
3847 /* Is this a loop candidate? - We look for "back edges" */
3849 BasicBlock* bottom = pred->flBlock;
3851 /* is this a backward edge? (from BOTTOM to TOP) */
3853 if (top->bbNum > bottom->bbNum)
3858 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3860 if (top->isLoopHead() == false)
3865 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3867 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3872 /* the top block must be able to reach the bottom block */
3873 if (!fgReachable(top, bottom))
3878 /* Found a new loop, record the longest backedge in foundBottom */
3880 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3882 foundBottom = bottom;
3890 /* Mark the loop header as such */
3891 assert(FitsIn<unsigned char>(loopNum));
3892 top->bbLoopNum = (unsigned char)loopNum;
3895 /* Mark all blocks between 'top' and 'bottom' */
3897 optMarkLoopBlocks(top, foundBottom, false);
3900 // We track at most 255 loops
3904 totalUnnatLoopOverflows++;
3911 totalUnnatLoopCount += loopNum;
3919 printf("\nFound a total of %d loops.", loopNum);
3920 printf("\nAfter loop weight marking:\n");
3921 fgDispBasicBlocks();
3926 optLoopsMarked = true;
3930 //------------------------------------------------------------------------
3931 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3934 // loopNum - the current loop index for which conditions are derived.
3935 // context - data structure where all loop cloning info is kept.
3938 // "false" if conditions cannot be obtained. "true" otherwise.
3939 // The cloning conditions are updated in the "conditions"[loopNum] field
3940 // of the "context" parameter.
3943 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3944 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3945 // condition is "less than". If the initializer is "var" init then adds condition
3946 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3947 // are added to "context". These conditions are checked in the pre-header block
3948 // and the cloning choice is made.
3951 // Callers should assume AND operation is used i.e., if all conditions are
3952 // true, then take the fast path.
3954 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3956 JITDUMP("------------------------------------------------------------\n");
3957 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3959 LoopDsc* loop = &optLoopTable[loopNum];
3960 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3962 if (loop->lpTestOper() == GT_LT)
3964 // Stride conditions
3965 if (loop->lpIterConst() <= 0)
3967 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3972 if (loop->lpFlags & LPFLG_CONST_INIT)
3974 // Only allowing const init at this time.
3975 if (loop->lpConstInit < 0)
3977 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3981 else if (loop->lpFlags & LPFLG_VAR_INIT)
3984 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3985 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3986 context->EnsureConditions(loopNum)->Push(geZero);
3990 JITDUMP("> Not variable init\n");
3996 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3998 int limit = loop->lpConstLimit();
4001 JITDUMP("> limit %d is invalid\n", limit);
4004 ident = LC_Ident(limit, LC_Ident::Const);
4006 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
4008 unsigned limitLcl = loop->lpVarLimit();
4009 ident = LC_Ident(limitLcl, LC_Ident::Var);
4011 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
4013 context->EnsureConditions(loopNum)->Push(geZero);
4015 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
4017 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
4018 if (!loop->lpArrLenLimit(this, index))
4020 JITDUMP("> ArrLen not matching");
4023 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4025 // Ensure that this array must be dereference-able, before executing the actual condition.
4026 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4027 context->EnsureDerefs(loopNum)->Push(array);
4031 JITDUMP("> Undetected limit\n");
4035 for (unsigned i = 0; i < optInfos->Size(); ++i)
4037 LcOptInfo* optInfo = optInfos->GetRef(i);
4038 switch (optInfo->GetOptType())
4040 case LcOptInfo::LcJaggedArray:
4043 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4044 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4045 LC_Ident arrLenIdent = LC_Ident(arrLen);
4047 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4048 context->EnsureConditions(loopNum)->Push(cond);
4050 // Ensure that this array must be dereference-able, before executing the actual condition.
4051 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4052 context->EnsureDerefs(loopNum)->Push(array);
4055 case LcOptInfo::LcMdArray:
4057 // limit <= mdArrLen
4058 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4059 LC_Condition cond(GT_LE, LC_Expr(ident),
4060 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4061 mdArrInfo->GetArrIndexForDim(getAllocator()),
4062 mdArrInfo->dim, LC_Array::None))));
4063 context->EnsureConditions(loopNum)->Push(cond);
4068 JITDUMP("Unknown opt\n");
4072 JITDUMP("Conditions: (");
4073 DBEXEC(verbose, context->PrintConditions(loopNum));
4080 //------------------------------------------------------------------------------------
4081 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4084 // loopNum - the current loop index for which conditions are derived.
4085 // context - data structure where all loop cloning info is kept.
4088 // "false" if conditions cannot be obtained. "true" otherwise.
4089 // The deref conditions are updated in the "derefConditions"[loopNum] field
4090 // of the "context" parameter.
4092 // Definition of Deref Conditions:
4093 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4094 // we should first be able to dereference "a". i.e., "a" is non-null.
4100 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4101 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4104 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4105 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4106 // be true to do the deref.
4108 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4110 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4111 // blocks each of which will branch to slow path if the condition evaluates to false.
4113 // Now, imagine a situation where we have
4114 // a[x][y][k] = 20 and a[i][j][k] = 0
4115 // also in the inner most loop where x, y are parameters, then our conditions will have
4119 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4121 // But these conditions can be checked together with conditions
4122 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4125 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4126 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4127 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4128 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4130 // This naturally yields a tree style pattern, where the nodes of the tree are
4131 // the array and indices respectively.
4147 // Notice that the variables in the same levels can have their conditions combined in the
4148 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4149 // combined with a short-circuiting AND (i.e., different basic blocks).
4152 // Construct a tree of array indices and the array which will generate the optimal
4153 // conditions for loop cloning.
4155 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4170 // In this method, we will construct such a tree by descending depth first into the array
4171 // index operation and forming a tree structure as we encounter the array or the index variables.
4173 // This tree structure will then be used to generate conditions like below:
4174 // (a != null) & (b != null) && // from the first level of the tree.
4176 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4177 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4179 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4180 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4185 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4187 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4190 // Get the dereference-able arrays.
4191 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4193 // For each array in the dereference list, construct a tree,
4194 // where the nodes are array and index variables and an edge 'u-v'
4195 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4196 // 'u-v-w' transitively if u[v][w] occurs.
4197 for (unsigned i = 0; i < deref->Size(); ++i)
4199 LC_Array& array = (*deref)[i];
4201 // First populate the array base variable.
4202 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4203 if (node == nullptr)
4205 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4209 // For each dimension (level) for the array, populate the tree with the variable
4210 // from that dimension.
4211 unsigned rank = (unsigned)array.GetDimRank();
4212 for (unsigned i = 0; i < rank; ++i)
4214 node->EnsureChildren(getAllocator());
4215 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4218 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4219 node->children->Push(tmp);
4222 // Descend one level down.
4226 // Keep the maxRank of all array dereferences.
4227 maxRank = max((int)rank, maxRank);
4233 for (unsigned i = 0; i < nodes.Size(); ++i)
4250 // First level will always yield the null-check, since it is made of the array base variables.
4251 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4252 // So add 1 after rank * 2.
4253 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4255 // Heuristic to not create too many blocks;
4261 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4262 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4263 for (unsigned i = 0; i < nodes.Size(); ++i)
4265 nodes[i]->DeriveLevelConditions(levelCond);
4268 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4273 //----------------------------------------------------------------------------
4274 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4277 // block - the block in which the helper call needs to be inserted.
4278 // insertBefore - the tree before which the helper call will be inserted.
4280 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4282 if (JitConfig.JitDebugLogLoopCloning() == 0)
4286 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4287 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4288 fgInsertStmtBefore(block, insertBefore, stmt);
4289 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4293 //------------------------------------------------------------------------
4294 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4295 // candidates gathered during the cloning phase.
4298 // loopNum - the current loop index for which the optimizations are performed.
4299 // context - data structure where all loop cloning info is kept.
4300 // dynamicPath - If true, the optimization is performed in the fast path among the
4301 // cloned loops. If false, it means this is the only path (i.e.,
4302 // there is no slow path.)
4305 // Perform the optimizations on the fast path i.e., the path in which the
4306 // optimization candidates were collected at the time of identifying them.
4307 // The candidates store all the information necessary (the tree/stmt/block
4308 // they are from) to perform the optimization.
4311 // The unoptimized path is either already cloned when this method is called or
4312 // there is no unoptimized path (got eliminated statically.) So this method
4313 // performs the optimizations assuming that the path in which the candidates
4314 // were collected is the fast path in which the optimizations will be performed.
4316 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4318 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4319 for (unsigned i = 0; i < optInfos->Size(); ++i)
4321 LcOptInfo* optInfo = optInfos->GetRef(i);
4322 switch (optInfo->GetOptType())
4324 case LcOptInfo::LcJaggedArray:
4326 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4327 compCurBB = arrIndexInfo->arrIndex.useBlock;
4328 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4330 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4333 case LcOptInfo::LcMdArray:
4334 // TODO-CQ: CLONE: Implement.
4342 //----------------------------------------------------------------------------
4343 // optCanCloneLoops: Use the environment flag to determine whether loop
4344 // cloning is allowed to be performed.
4347 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4348 // Disabled for retail for now.
4350 bool Compiler::optCanCloneLoops()
4352 // Enabled for retail builds now.
4353 unsigned cloneLoopsFlag = 1;
4355 cloneLoopsFlag = JitConfig.JitCloneLoops();
4357 return (cloneLoopsFlag != 0);
4360 //----------------------------------------------------------------------------
4361 // optIsLoopClonable: Determine whether this loop can be cloned.
4364 // loopInd loop index which needs to be checked if it can be cloned.
4367 // Returns true if the loop can be cloned. If it returns false
4368 // prints a message in debug as why the loop can't be cloned.
4370 bool Compiler::optIsLoopClonable(unsigned loopInd)
4372 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4373 // inserting new EH regions in the exception table yet.
4374 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4375 unsigned loopRetCount = 0;
4376 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4378 if (blk->bbJumpKind == BBJ_RETURN)
4382 if (bbIsTryBeg(blk))
4384 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4389 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4390 // into the middle of a handler (to go to the cloned copy.) Reject.
4391 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4393 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4397 // If the head and entry are in different EH regions, reject.
4398 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4400 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4404 // Is the first block after the last block of the loop a handler or filter start?
4405 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4406 // and go to where the original loop did. That raises problems when we don't actually go to
4407 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4408 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4409 // loop. This is just a corner to cut to get this working faster.
4410 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4411 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4413 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4417 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4418 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4419 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4420 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4421 if (fgReturnCount + loopRetCount > 4)
4423 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4424 "would exceed the limit of 4.\n",
4425 loopRetCount, fgReturnCount);
4429 // Otherwise, we're going to add those return blocks.
4430 fgReturnCount += loopRetCount;
4435 /*****************************************************************************
4437 * Identify loop cloning opportunities, derive loop cloning conditions,
4438 * perform loop cloning, use the derived conditions to choose which
4441 void Compiler::optCloneLoops()
4443 JITDUMP("\n*************** In optCloneLoops()\n");
4444 if (optLoopCount == 0 || !optCanCloneLoops())
4452 printf("Blocks/Trees at start of phase\n");
4453 fgDispBasicBlocks(true);
4457 LoopCloneContext context(optLoopCount, getAllocator());
4459 // Obtain array optimization candidates in the context.
4460 optObtainLoopCloningOpts(&context);
4462 // For each loop, derive cloning conditions for the optimization candidates.
4463 for (unsigned i = 0; i < optLoopCount; ++i)
4465 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4466 if (optInfos == nullptr)
4471 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4473 JITDUMP("> Conditions could not be obtained\n");
4474 context.CancelLoopOptInfo(i);
4478 bool allTrue = false;
4479 bool anyFalse = false;
4480 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4483 context.CancelLoopOptInfo(i);
4487 // Perform static optimizations on the fast path since we always
4488 // have to take the cloned path.
4489 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4491 // No need to clone.
4492 context.CancelLoopOptInfo(i);
4498 // The code in this #if has been useful in debugging loop cloning issues, by
4499 // enabling selective enablement of the loop cloning optimization according to
4502 unsigned methHash = info.compMethodHash();
4503 char* lostr = getenv("loopclonehashlo");
4504 unsigned methHashLo = 0;
4507 sscanf_s(lostr, "%x", &methHashLo);
4508 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4510 char* histr = getenv("loopclonehashhi");
4511 unsigned methHashHi = UINT32_MAX;
4514 sscanf_s(histr, "%x", &methHashHi);
4515 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4517 if (methHash < methHashLo || methHash > methHashHi)
4522 for (unsigned i = 0; i < optLoopCount; ++i)
4524 if (context.GetLoopOptInfo(i) != nullptr)
4527 context.OptimizeConditions(i DEBUGARG(verbose));
4528 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4529 optCloneLoop(i, &context);
4536 printf("\nAfter loop cloning:\n");
4537 fgDispBasicBlocks(/*dumpTrees*/ true);
4542 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4544 assert(loopInd < optLoopCount);
4546 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4547 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4548 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4550 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4551 unsigned depth = optLoopDepth(loopInd);
4552 unsigned ambientWeight = 1;
4553 for (unsigned j = 0; j < depth; j++)
4555 unsigned lastWeight = ambientWeight;
4556 ambientWeight *= BB_LOOP_WEIGHT;
4557 // If the multiplication overflowed, stick at max.
4558 // (Strictly speaking, a multiplication could overflow and still have a result
4559 // that is >= lastWeight...but if so, the original weight must be pretty large,
4560 // and it got bigger, so that's OK.)
4561 if (ambientWeight < lastWeight)
4563 ambientWeight = BB_MAX_WEIGHT;
4568 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4569 // Be safe by taking the max with the head block's weight.
4570 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4572 // This is the containing loop, if any -- to label any blocks we create that are outside
4573 // the loop being cloned.
4574 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4576 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4577 optEnsureUniqueHead(loopInd, ambientWeight);
4579 // We're going to make
4591 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4603 BasicBlock* h = optLoopTable[loopInd].lpHead;
4604 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4606 // Make a new block to be the unique entry to the loop.
4607 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4608 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4609 /*extendRegion*/ true);
4610 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4611 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4612 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4613 newH->bbNatLoopNum = ambientLoop;
4615 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4618 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4619 // "newPred" will be the predecessor of the blocks of the cloned loop.
4620 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4621 BasicBlock* newPred = b;
4622 if (b->bbJumpKind != BBJ_ALWAYS)
4624 BasicBlock* x = b->bbNext;
4627 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4628 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4630 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4631 x2->bbNatLoopNum = ambientLoop;
4634 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4639 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4640 // so that "h" already falls through to "e" (e == t == f).
4641 BasicBlock* h2 = nullptr;
4642 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4644 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4645 /*extendRegion*/ true);
4646 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4648 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4649 h2->bbNatLoopNum = ambientLoop;
4651 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4652 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4655 // Now we'll clone the blocks of the loop body.
4656 BasicBlock* newFirst = nullptr;
4657 BasicBlock* newBot = nullptr;
4659 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4660 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4663 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4664 /*extendRegion*/ true);
4666 BasicBlock::CloneBlockState(this, newBlk, blk);
4667 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4668 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4669 // loop, if one exists -- the parent of the loop we're cloning.
4670 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4672 if (newFirst == nullptr)
4676 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4678 blockMap->Set(blk, newBlk);
4681 // Perform the static optimizations on the fast path.
4682 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4684 // Now go through the new blocks, remapping their jump targets within the loop.
4685 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4689 BasicBlock* newblk = nullptr;
4690 bool b = blockMap->Lookup(blk, &newblk);
4691 assert(b && newblk != nullptr);
4693 assert(blk->bbJumpKind == newblk->bbJumpKind);
4695 // First copy the jump destination(s) from "blk".
4696 optCopyBlkDest(blk, newblk);
4698 // Now redirect the new block according to "blockMap".
4699 optRedirectBlock(newblk, blockMap);
4702 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4703 (h->bbJumpKind == BBJ_ALWAYS));
4705 // If all the conditions are true, go to E2.
4706 BasicBlock* e2 = nullptr;
4707 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4709 h->bbJumpKind = BBJ_COND;
4711 // We will create the following structure
4713 // cond0 (in h) -?> cond1
4714 // slow --> e2 (slow) always
4721 // We should always have block conditions, at the minimum, the array should be deref-able
4722 assert(context->HasBlockConditions(loopInd));
4724 // Create a unique header for the slow path.
4725 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4726 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4727 slowHead->bbNatLoopNum = ambientLoop;
4728 slowHead->bbJumpDest = e2;
4730 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4731 condLast->bbJumpDest = slowHead;
4733 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4736 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4738 assert(foundIt && e2 != nullptr);
4740 fgUpdateChangedFlowGraph();
4743 //--------------------------------------------------------------------------------------------------
4744 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4747 // context loop cloning context variable
4748 // loopNum the loop index
4749 // head loop head for "loopNum"
4750 // slowHead the slow path loop head
4756 // Create the following structure.
4758 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4759 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4761 // cond0 (in h) -?> cond1
4762 // slowHead --> e2 (slowHead) always
4763 // !cond1 -?> slowHead
4764 // !cond2 -?> slowHead
4766 // !condn -?> slowHead
4769 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4771 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4774 BasicBlock* slowHead)
4776 JITDUMP("Inserting loop cloning conditions\n");
4777 assert(context->HasBlockConditions(loopNum));
4779 BasicBlock* curCond = head;
4780 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4781 for (unsigned i = 0; i < levelCond->Size(); ++i)
4783 bool isHeaderBlock = (curCond == head);
4785 // Flip the condition if header block.
4786 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4788 // Create each condition block ensuring wiring between them.
4789 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4790 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4793 curCond->inheritWeight(head);
4794 curCond->bbNatLoopNum = head->bbNatLoopNum;
4795 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4798 // Finally insert cloning conditions after all deref conditions have been inserted.
4799 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4803 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4805 BasicBlock* h = optLoopTable[loopInd].lpHead;
4806 BasicBlock* t = optLoopTable[loopInd].lpTop;
4807 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4808 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4810 // If "h" dominates the entry block, then it is the unique header.
4811 if (fgDominate(h, e))
4816 // Otherwise, create a new empty header block, make it the pred of the entry block,
4817 // and redirect the preds of the entry block to go to this.
4819 BasicBlock* beforeTop = t->bbPrev;
4820 // Make sure that the new block is in the same region as the loop.
4821 // (We will only create loops that are entirely within a region.)
4822 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4823 // This is in the containing loop.
4824 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4825 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4827 // We don't care where it was put; splice it between beforeTop and top.
4828 if (beforeTop->bbNext != h2)
4830 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4831 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4835 if (h2->bbNext != e)
4837 h2->bbJumpKind = BBJ_ALWAYS;
4840 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4842 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4843 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4844 blockMap->Set(e, h2);
4846 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4848 BasicBlock* predBlock = predEntry->flBlock;
4850 // Skip if predBlock is in the loop.
4851 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4855 optRedirectBlock(predBlock, blockMap);
4858 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4861 /*****************************************************************************
4863 * Determine the kind of interference for the call.
4866 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4868 assert(call->gtOper == GT_CALL);
4870 // if not a helper, kills everything
4871 if (call->gtCall.gtCallType != CT_HELPER)
4876 // setfield and array address store kill all indirections
4877 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4879 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4880 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4881 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4882 case CORINFO_HELP_SETFIELDOBJ:
4883 case CORINFO_HELP_ARRADDR_ST:
4885 return CALLINT_REF_INDIRS;
4887 case CORINFO_HELP_SETFIELDFLOAT:
4888 case CORINFO_HELP_SETFIELDDOUBLE:
4889 case CORINFO_HELP_SETFIELD8:
4890 case CORINFO_HELP_SETFIELD16:
4891 case CORINFO_HELP_SETFIELD32:
4892 case CORINFO_HELP_SETFIELD64:
4894 return CALLINT_SCL_INDIRS;
4896 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4897 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4898 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4899 case CORINFO_HELP_SETFIELDSTRUCT:
4901 return CALLINT_ALL_INDIRS;
4907 // other helpers kill nothing
4908 return CALLINT_NONE;
4911 /*****************************************************************************
4913 * See if the given tree can be computed in the given precision (which must
4914 * be smaller than the type of the tree for this to make sense). If 'doit'
4915 * is false, we merely check to see whether narrowing is possible; if we
4916 * get called with 'doit' being true, we actually perform the narrowing.
4919 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4925 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4927 /* Assume we're only handling integer types */
4928 noway_assert(varTypeIsIntegral(srct));
4929 noway_assert(varTypeIsIntegral(dstt));
4931 unsigned srcSize = genTypeSize(srct);
4932 unsigned dstSize = genTypeSize(dstt);
4934 /* dstt must be smaller than srct to narrow */
4935 if (dstSize >= srcSize)
4940 /* Figure out what kind of a node we have */
4941 oper = tree->OperGet();
4942 kind = tree->OperKind();
4944 if (kind & GTK_ASGOP)
4946 noway_assert(doit == false);
4950 ValueNumPair NoVNPair = ValueNumPair();
4952 if (kind & GTK_LEAF)
4956 /* Constants can usually be narrowed by changing their value */
4957 CLANG_FORMAT_COMMENT_ANCHOR;
4959 #ifndef _TARGET_64BIT_
4964 lval = tree->gtIntConCommon.LngValue();
4993 if ((lval & lmask) != lval)
4998 tree->ChangeOperConst(GT_CNS_INT);
4999 tree->gtType = TYP_INT;
5000 tree->gtIntCon.gtIconVal = (int)lval;
5001 if (vnStore != nullptr)
5003 fgValueNumberTreeConst(tree);
5013 ival = tree->gtIntCon.gtIconVal;
5032 #ifdef _TARGET_64BIT_
5039 #endif // _TARGET_64BIT_
5044 if ((ival & imask) != ival)
5049 #ifdef _TARGET_64BIT_
5052 tree->gtType = TYP_INT;
5053 tree->gtIntCon.gtIconVal = (int)ival;
5054 if (vnStore != nullptr)
5056 fgValueNumberTreeConst(tree);
5059 #endif // _TARGET_64BIT_
5063 /* Operands that are in memory can usually be narrowed
5064 simply by changing their gtType */
5067 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5068 if (dstSize == sizeof(int))
5081 noway_assert(doit == false);
5085 if (kind & (GTK_BINOP | GTK_UNOP))
5088 op1 = tree->gtOp.gtOp1;
5090 op2 = tree->gtOp.gtOp2;
5092 switch (tree->gtOper)
5095 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5097 // Is op2 a small constant than can be narrowed into dstt?
5098 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5099 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5101 // We will change the type of the tree and narrow op2
5105 tree->gtType = genActualType(dstt);
5106 tree->SetVNs(vnpNarrow);
5108 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5109 // We may also need to cast away the upper bits of op1
5112 assert(tree->gtType == TYP_INT);
5113 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5115 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5117 tree->gtOp.gtOp1 = op1;
5128 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5130 noway_assert(doit == false);
5138 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5139 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5141 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5142 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5144 noway_assert(doit == false);
5148 /* Simply change the type of the tree */
5152 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5154 tree->gtFlags &= ~GTF_MUL_64RSLT;
5157 tree->gtType = genActualType(dstt);
5158 tree->SetVNs(vnpNarrow);
5166 /* Simply change the type of the tree */
5168 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5170 tree->gtType = genSignedType(dstt);
5171 tree->SetVNs(vnpNarrow);
5173 /* Make sure we don't mess up the variable type */
5174 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5176 tree->gtFlags |= GTF_VAR_CAST;
5189 /* These can always be narrowed since they only represent 0 or 1 */
5194 var_types cast = tree->CastToType();
5195 var_types oprt = op1->TypeGet();
5196 unsigned oprSize = genTypeSize(oprt);
5203 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5208 if (tree->gtOverflow())
5213 /* Is this a cast from the type we're narrowing to or a smaller one? */
5215 if (oprSize <= dstSize)
5217 /* Bash the target type of the cast */
5221 dstt = genSignedType(dstt);
5223 if (oprSize == dstSize)
5225 // Same size: change the CAST into a NOP
5226 tree->ChangeOper(GT_NOP);
5227 tree->gtType = dstt;
5228 tree->gtOp.gtOp2 = nullptr;
5229 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5233 // oprSize is smaller
5234 assert(oprSize < dstSize);
5236 // Change the CastToType in the GT_CAST node
5237 tree->CastToType() = dstt;
5239 // The result type of a GT_CAST is never a small type.
5240 // Use genActualType to widen dstt when it is a small types.
5241 tree->gtType = genActualType(dstt);
5242 tree->SetVNs(vnpNarrow);
5252 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5254 /* Simply change the type of the tree */
5258 tree->gtType = genActualType(dstt);
5259 tree->SetVNs(vnpNarrow);
5266 noway_assert(doit == false);
5274 /*****************************************************************************
5276 * The following logic figures out whether the given variable is assigned
5277 * somewhere in a list of basic blocks (or in an entire loop).
5280 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5282 GenTreePtr tree = *pTree;
5284 if (tree->OperKind() & GTK_ASGOP)
5286 GenTreePtr dest = tree->gtOp.gtOp1;
5287 genTreeOps destOper = dest->OperGet();
5289 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5290 assert(desc && desc->ivaSelf == desc);
5292 if (destOper == GT_LCL_VAR)
5294 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5295 if (tvar < lclMAX_ALLSET_TRACKED)
5297 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5301 desc->ivaMaskIncomplete = true;
5304 if (tvar == desc->ivaVar)
5306 if (tree != desc->ivaSkip)
5312 else if (destOper == GT_LCL_FLD)
5314 /* We can't track every field of every var. Moreover, indirections
5315 may access different parts of the var as different (but
5316 overlapping) fields. So just treat them as indirect accesses */
5318 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5319 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5321 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5322 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5324 else if (destOper == GT_CLS_VAR)
5326 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5328 else if (destOper == GT_IND)
5330 /* Set the proper indirection bits */
5332 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5333 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5336 else if (tree->gtOper == GT_CALL)
5338 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5339 assert(desc && desc->ivaSelf == desc);
5341 desc->ivaMaskCall = optCallInterf(tree);
5344 return WALK_CONTINUE;
5347 /*****************************************************************************/
5349 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5354 desc.ivaSkip = skip;
5356 desc.ivaSelf = &desc;
5359 desc.ivaMaskCall = CALLINT_NONE;
5360 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5366 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5368 noway_assert(stmt->gtOper == GT_STMT);
5369 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5391 /*****************************************************************************/
5392 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5396 /* Get hold of the loop descriptor */
5398 noway_assert(lnum < optLoopCount);
5399 loop = optLoopTable + lnum;
5401 /* Do we already know what variables are assigned within this loop? */
5403 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5410 /* Prepare the descriptor used by the tree walker call-back */
5412 desc.ivaVar = (unsigned)-1;
5413 desc.ivaSkip = nullptr;
5415 desc.ivaSelf = &desc;
5417 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5418 desc.ivaMaskInd = VR_NONE;
5419 desc.ivaMaskCall = CALLINT_NONE;
5420 desc.ivaMaskIncomplete = false;
5422 /* Now walk all the statements of the loop */
5424 beg = loop->lpHead->bbNext;
5425 end = loop->lpBottom;
5427 for (/**/; /**/; beg = beg->bbNext)
5431 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5433 noway_assert(stmt->gtOper == GT_STMT);
5434 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5436 if (desc.ivaMaskIncomplete)
5438 loop->lpFlags |= LPFLG_ASGVARS_INC;
5448 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5449 loop->lpAsgInds = desc.ivaMaskInd;
5450 loop->lpAsgCall = desc.ivaMaskCall;
5452 /* Now we know what variables are assigned in the loop */
5454 loop->lpFlags |= LPFLG_ASGVARS_YES;
5457 /* Now we can finally test the caller's mask against the loop's */
5458 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5463 switch (loop->lpAsgCall)
5467 /* Can't hoist if the call might have side effect on an indirection. */
5469 if (loop->lpAsgInds != VR_NONE)
5476 case CALLINT_REF_INDIRS:
5478 /* Can't hoist if the call might have side effect on an ref indirection. */
5480 if (loop->lpAsgInds & VR_IND_REF)
5487 case CALLINT_SCL_INDIRS:
5489 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5491 if (loop->lpAsgInds & VR_IND_SCL)
5498 case CALLINT_ALL_INDIRS:
5500 /* Can't hoist if the call might have side effect on any indirection. */
5502 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5511 /* Other helpers kill nothing */
5516 noway_assert(!"Unexpected lpAsgCall value");
5522 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5527 printf("\nHoisting a copy of ");
5528 printTreeID(origExpr);
5529 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5530 optLoopTable[lnum].lpBottom->bbNum);
5531 gtDispTree(origExpr);
5536 // This loop has to be in a form that is approved for hoisting.
5537 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5539 // Create a copy of the expression and mark it for CSE's.
5540 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5542 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5543 assert(hoistExpr != origExpr);
5544 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5546 GenTreePtr hoist = hoistExpr;
5547 // The value of the expression isn't used (unless it's an assignment).
5548 if (hoistExpr->OperGet() != GT_ASG)
5550 hoist = gtUnusedValNode(hoistExpr);
5553 /* Put the statement in the preheader */
5555 fgCreateLoopPreHeader(lnum);
5557 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5558 assert(preHead->bbJumpKind == BBJ_NONE);
5560 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5561 // (or in this case, will contain) the expression.
5562 compCurBB = preHead;
5564 // Increment the ref counts of any local vars appearing in "hoist".
5565 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5566 // fold away some of the lcl vars referenced by "hoist".
5567 lvaRecursiveIncRefCounts(hoist);
5569 hoist = fgMorphTree(hoist);
5571 GenTreePtr hoistStmt = gtNewStmt(hoist);
5572 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5574 /* simply append the statement at the end of the preHead's list */
5576 GenTreePtr treeList = preHead->bbTreeList;
5580 /* append after last statement */
5582 GenTreePtr last = treeList->gtPrev;
5583 assert(last->gtNext == nullptr);
5585 last->gtNext = hoistStmt;
5586 hoistStmt->gtPrev = last;
5587 treeList->gtPrev = hoistStmt;
5591 /* Empty pre-header - store the single statement in the block */
5593 preHead->bbTreeList = hoistStmt;
5594 hoistStmt->gtPrev = hoistStmt;
5597 hoistStmt->gtNext = nullptr;
5602 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5607 if (fgStmtListThreaded)
5609 gtSetStmtInfo(hoistStmt);
5610 fgSetStmtSeq(hoistStmt);
5614 if (m_nodeTestData != nullptr)
5617 // What is the depth of the loop "lnum"?
5619 unsigned lnumIter = lnum;
5620 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5623 lnumIter = optLoopTable[lnumIter].lpParent;
5626 NodeToTestDataMap* testData = GetNodeTestData();
5628 TestLabelAndNum tlAndN;
5629 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5631 if (tlAndN.m_num == -1)
5634 printTreeID(origExpr);
5635 printf(" was declared 'do not hoist', but is being hoisted.\n");
5638 else if (tlAndN.m_num != depth)
5641 printTreeID(origExpr);
5642 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5644 tlAndN.m_num, depth);
5649 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5650 // hoist" annotations.
5651 testData->Remove(origExpr);
5652 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5653 tlAndN.m_tl = TL_CSE_Def;
5654 tlAndN.m_num = m_loopHoistCSEClass++;
5655 testData->Set(hoistExpr, tlAndN);
5661 #if LOOP_HOIST_STATS
5662 if (!m_curLoopHasHoistedExpression)
5664 m_loopsWithHoistedExpressions++;
5665 m_curLoopHasHoistedExpression = true;
5667 m_totalHoistedExpressions++;
5668 #endif // LOOP_HOIST_STATS
5671 void Compiler::optHoistLoopCode()
5673 // If we don't have any loops in the method then take an early out now.
5674 if (optLoopCount == 0)
5680 unsigned jitNoHoist = JitConfig.JitNoHoist();
5688 // The code in this #if has been useful in debugging loop cloning issues, by
5689 // enabling selective enablement of the loop cloning optimization according to
5692 unsigned methHash = info.compMethodHash();
5693 char* lostr = getenv("loophoisthashlo");
5694 unsigned methHashLo = 0;
5697 sscanf_s(lostr, "%x", &methHashLo);
5698 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5700 char* histr = getenv("loophoisthashhi");
5701 unsigned methHashHi = UINT32_MAX;
5704 sscanf_s(histr, "%x", &methHashHi);
5705 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5707 if (methHash < methHashLo || methHash > methHashHi)
5709 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5711 #endif // 0 -- debugging loop cloning issues
5716 printf("\n*************** In optHoistLoopCode()\n");
5717 printf("Blocks/Trees before phase\n");
5718 fgDispBasicBlocks(true);
5723 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5724 // they are invariant.)
5725 LoopHoistContext hoistCtxt(this);
5726 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5728 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5733 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5735 optHoistLoopNest(lnum, &hoistCtxt);
5744 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5745 fgDispBasicBlocks(true);
5749 // Make sure that the predecessor lists are accurate
5750 fgDebugCheckBBlist();
5755 // Test Data stuff..
5756 // If we have no test data, early out.
5757 if (m_nodeTestData == nullptr)
5761 NodeToTestDataMap* testData = GetNodeTestData();
5762 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5764 TestLabelAndNum tlAndN;
5765 GenTreePtr node = ki.Get();
5766 bool b = testData->Lookup(node, &tlAndN);
5768 if (tlAndN.m_tl != TL_LoopHoist)
5772 // Otherwise, it is a loop hoist annotation.
5773 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5774 if (tlAndN.m_num >= 0)
5778 printf(" was declared 'must hoist', but has not been hoisted.\n");
5785 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5787 // Do this loop, then recursively do all nested loops.
5788 CLANG_FORMAT_COMMENT_ANCHOR;
5790 #if LOOP_HOIST_STATS
5792 m_curLoopHasHoistedExpression = false;
5793 m_loopsConsidered++;
5794 #endif // LOOP_HOIST_STATS
5796 optHoistThisLoop(lnum, hoistCtxt);
5798 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5800 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5802 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5803 // TODO-Cleanup: we should have a set abstraction for loops.
5804 if (hoistedInCurLoop != nullptr)
5806 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5810 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5812 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5816 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5817 child = optLoopTable[child].lpSibling)
5819 optHoistLoopNest(child, hoistCtxt);
5823 // TODO-Cleanup: we should have a set abstraction for loops.
5824 if (hoistedInCurLoop != nullptr)
5826 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5828 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5829 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5835 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5837 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5839 /* If loop was removed continue */
5841 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5846 /* Get the head and tail of the loop */
5848 BasicBlock* head = pLoopDsc->lpHead;
5849 BasicBlock* tail = pLoopDsc->lpBottom;
5850 BasicBlock* lbeg = pLoopDsc->lpEntry;
5853 // We must have a do-while loop
5854 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5859 // The loop-head must dominate the loop-entry.
5860 // TODO-CQ: Couldn't we make this true if it's not?
5861 if (!fgDominate(head, lbeg))
5866 // if lbeg is the start of a new try block then we won't be able to hoist
5867 if (!BasicBlock::sameTryRegion(head, lbeg))
5872 // We don't bother hoisting when inside of a catch block
5873 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5878 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5880 unsigned begn = lbeg->bbNum;
5881 unsigned endn = tail->bbNum;
5883 // Ensure the per-loop sets/tables are empty.
5884 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5889 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5890 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5894 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5896 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5897 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5898 pLoopDsc->lpHoistedExprCount = 0;
5900 #ifndef _TARGET_64BIT_
5901 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5903 if (longVarsCount > 0)
5905 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5906 // the Counts such that each TYP_LONG variable counts twice.
5908 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5909 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5914 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5915 lvaDispVarSet(lvaLongVars);
5918 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5919 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5921 #endif // !_TARGET_64BIT_
5926 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5927 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5929 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5930 lvaDispVarSet(pLoopDsc->lpVarInOut);
5932 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5933 lvaDispVarSet(loopVars);
5938 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5940 if (floatVarsCount > 0)
5942 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5943 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5945 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5946 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5947 pLoopDsc->lpHoistedFPExprCount = 0;
5949 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5950 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5955 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5956 lvaDispVarSet(inOutFPVars);
5958 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5959 lvaDispVarSet(loopFPVars);
5963 else // (floatVarsCount == 0)
5965 pLoopDsc->lpLoopVarFPCount = 0;
5966 pLoopDsc->lpVarInOutFPCount = 0;
5967 pLoopDsc->lpHoistedFPExprCount = 0;
5970 // Find the set of definitely-executed blocks.
5971 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5972 // Until we have post-dominators, we'll special-case for single-exit blocks.
5973 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5974 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5976 assert(pLoopDsc->lpExit != nullptr);
5977 BasicBlock* cur = pLoopDsc->lpExit;
5978 // Push dominators, until we reach "entry" or exit the loop.
5979 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5984 // If we didn't reach the entry block, give up and *just* push the entry block.
5985 if (cur != pLoopDsc->lpEntry)
5989 defExec.Push(pLoopDsc->lpEntry);
5991 else // More than one exit
5993 // We'll assume that only the entry block is definitely executed.
5994 // We could in the future do better.
5995 defExec.Push(pLoopDsc->lpEntry);
5998 while (defExec.Size() > 0)
6000 // Consider in reverse order: dominator before dominatee.
6001 BasicBlock* blk = defExec.Pop();
6002 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
6006 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
6007 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
6009 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6010 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
6011 unsigned blkWeight = blk->getBBWeight(this);
6016 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
6017 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
6018 firstBlockAndBeforeSideEffect ? "true" : "false");
6019 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6021 printf(" block weight is too small to perform hoisting.\n");
6026 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6028 // Block weight is too small to perform hoisting.
6032 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6034 GenTreePtr stmtTree = stmt->gtStmtExpr;
6036 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6039 // we will try to hoist the top-level stmtTree
6040 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6045 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6047 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6049 bool loopContainsCall = pLoopDsc->lpContainsCall;
6052 int hoistedExprCount;
6056 if (varTypeIsFloating(tree->TypeGet()))
6058 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6059 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6060 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6062 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6063 if (!loopContainsCall)
6065 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6068 // For ARM each double takes two FP registers
6069 // For now on ARM we won't track singles/doubles
6070 // and instead just assume that we always have doubles.
6077 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6078 loopVarCount = pLoopDsc->lpLoopVarCount;
6079 varInOutCount = pLoopDsc->lpVarInOutCount;
6081 availRegCount = CNT_CALLEE_SAVED - 1;
6082 if (!loopContainsCall)
6084 availRegCount += CNT_CALLEE_TRASH - 1;
6086 #ifndef _TARGET_64BIT_
6087 // For our 32-bit targets Long types take two registers.
6088 if (varTypeIsLong(tree->TypeGet()))
6090 availRegCount = (availRegCount + 1) / 2;
6095 // decrement the availRegCount by the count of expression that we have already hoisted.
6096 availRegCount -= hoistedExprCount;
6098 // the variables that are read/written inside the loop should
6099 // always be a subset of the InOut variables for the loop
6100 assert(loopVarCount <= varInOutCount);
6102 // When loopVarCount >= availRegCount we believe that all of the
6103 // available registers will get used to hold LclVars inside the loop.
6104 // This pessimistically assumes that each loopVar has a conflicting
6105 // lifetime with every other loopVar.
6106 // For this case we will hoist the expression only if is profitable
6107 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6108 // as we believe it will be placed in the stack or one of the other
6109 // loopVars will be spilled into the stack
6111 if (loopVarCount >= availRegCount)
6113 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6114 if (tree->gtCostEx < (2 * IND_COST_EX))
6120 // When varInOutCount < availRegCount we are know that there are
6121 // some available register(s) when we enter the loop body.
6122 // When varInOutCount == availRegCount there often will be a register
6123 // available when we enter the loop body, since a loop often defines a
6124 // LclVar on exit or there is often at least one LclVar that is worth
6125 // spilling to the stack to make way for this hoisted expression.
6126 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6128 if (varInOutCount > availRegCount)
6130 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6131 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6141 // This function returns true if 'tree' is a loop invariant expression.
6142 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6144 bool Compiler::optHoistLoopExprsForTree(
6145 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6147 // First do the children.
6148 // We must keep track of whether each child node was hoistable or not
6150 unsigned nChildren = tree->NumChildren();
6151 bool childrenHoistable[GenTree::MAX_CHILDREN];
6153 // Initialize the array elements for childrenHoistable[] to false
6154 for (unsigned i = 0; i < nChildren; i++)
6156 childrenHoistable[i] = false;
6159 bool treeIsInvariant = true;
6160 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6162 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6163 &childrenHoistable[childNum]))
6165 treeIsInvariant = false;
6169 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6171 bool treeIsHoistable = treeIsInvariant;
6173 // But we must see if anything else prevents "tree" from being hoisted.
6175 if (treeIsInvariant)
6177 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6178 treeIsHoistable = optIsCSEcandidate(tree);
6180 // If it's a call, it must be a helper call, and be pure.
6181 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6182 // (meaning it won't run a cctor because the class is not precise-init).
6183 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6185 GenTreeCall* call = tree->AsCall();
6186 if (call->gtCallType != CT_HELPER)
6188 treeIsHoistable = false;
6192 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6193 if (!s_helperCallProperties.IsPure(helpFunc))
6195 treeIsHoistable = false;
6197 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6199 treeIsHoistable = false;
6204 if (treeIsHoistable)
6206 if (!(*pFirstBlockAndBeforeSideEffect))
6208 // For now, we give up on an expression that might raise an exception if it is after the
6209 // first possible global side effect (and we assume we're after that if we're not in the first block).
6210 // TODO-CQ: this is when we might do loop cloning.
6212 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6214 treeIsHoistable = false;
6217 // Currently we must give up on reads from static variables (even if we are in the first block).
6219 if (tree->OperGet() == GT_CLS_VAR)
6221 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6223 treeIsHoistable = false;
6227 // Is the value of the whole tree loop invariant?
6229 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6231 // Is the value of the whole tree loop invariant?
6232 if (!treeIsInvariant)
6234 treeIsHoistable = false;
6238 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6239 // If we encounter a tree with a call in it
6240 // or if we see an assignment to global we set it to false.
6242 // If we are already set to false then we can skip these checks
6244 if (*pFirstBlockAndBeforeSideEffect)
6246 // For this purpose, we only care about memory side effects. We assume that expressions will
6247 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6248 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6249 // here, since that includes exceptions.)
6252 // If it's a call, it must be a helper call that does not mutate the heap.
6253 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6254 // (meaning it won't run a cctor because the class is not precise-init).
6255 GenTreeCall* call = tree->AsCall();
6256 if (call->gtCallType != CT_HELPER)
6258 *pFirstBlockAndBeforeSideEffect = false;
6262 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6263 if (s_helperCallProperties.MutatesHeap(helpFunc))
6265 *pFirstBlockAndBeforeSideEffect = false;
6267 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6269 *pFirstBlockAndBeforeSideEffect = false;
6273 else if (tree->OperIsAssignment())
6275 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6276 GenTreePtr lhs = tree->gtOp.gtOp1;
6277 if (lhs->gtFlags & GTF_GLOB_REF)
6279 *pFirstBlockAndBeforeSideEffect = false;
6282 else if (tree->OperIsCopyBlkOp())
6284 GenTreePtr args = tree->gtOp.gtOp1;
6285 assert(args->OperGet() == GT_LIST);
6286 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6288 *pFirstBlockAndBeforeSideEffect = false;
6293 // If this 'tree' is hoistable then we return and the caller will
6294 // decide to hoist it as part of larger hoistable expression.
6296 if (!treeIsHoistable)
6298 // We are not hoistable so we will now hoist any hoistable children.
6300 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6302 if (childrenHoistable[childNum])
6304 // We can't hoist the LHS of an assignment, isn't a real use.
6305 if (childNum == 0 && (tree->OperIsAssignment()))
6310 GenTreePtr child = tree->GetChild(childNum);
6312 // We try to hoist this 'child' tree
6313 optHoistCandidate(child, lnum, hoistCtxt);
6318 *pHoistable = treeIsHoistable;
6319 return treeIsInvariant;
6322 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6324 if (lnum == BasicBlock::NOT_IN_LOOP)
6326 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6330 // The outer loop also must be suitable for hoisting...
6331 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6336 // If the hoisted expression isn't valid at this loop head then break
6337 if (!optTreeIsValidAtLoopHead(tree, lnum))
6342 // It must pass the hoistable profitablity tests for this loop level
6343 if (!optIsProfitableToHoistableTree(tree, lnum))
6349 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6351 // already hoisted in a parent loop, so don't hoist this expression.
6355 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6357 // already hoisted this expression in the current loop, so don't hoist this expression.
6361 // Expression can be hoisted
6362 optPerformHoistExpr(tree, lnum);
6364 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6365 if (!varTypeIsFloating(tree->TypeGet()))
6367 optLoopTable[lnum].lpHoistedExprCount++;
6368 #ifndef _TARGET_64BIT_
6369 // For our 32-bit targets Long types take two registers.
6370 if (varTypeIsLong(tree->TypeGet()))
6372 optLoopTable[lnum].lpHoistedExprCount++;
6376 else // Floating point expr hoisted
6378 optLoopTable[lnum].lpHoistedFPExprCount++;
6381 // Record the hoisted expression in hoistCtxt
6382 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6385 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6387 // If it is not a VN, is not loop-invariant.
6388 if (vn == ValueNumStore::NoVN)
6393 // We'll always short-circuit constants.
6394 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6399 // If we've done this query previously, don't repeat.
6400 bool previousRes = false;
6401 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6408 if (vnStore->GetVNFunc(vn, &funcApp))
6410 if (funcApp.m_func == VNF_PhiDef)
6412 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6413 VNFuncApp phiDefValFuncApp;
6414 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6416 // It's not *really* a definition, rather a pass-through of some other VN.
6417 // (This could occur, say if both sides of an if-then-else diamond made the
6418 // same assignment to a variable.)
6419 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6423 // Is the definition within the loop? If so, is not loop-invariant.
6424 unsigned lclNum = funcApp.m_args[0];
6425 unsigned ssaNum = funcApp.m_args[1];
6426 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6427 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6430 else if (funcApp.m_func == VNF_PhiHeapDef)
6432 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6433 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6437 for (unsigned i = 0; i < funcApp.m_arity; i++)
6439 // TODO-CQ: We need to either make sure that *all* VN functions
6440 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6442 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6452 // Non-function "new, unique" VN's may be annotated with the loop nest where
6453 // their definition occurs.
6454 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
6456 if (vnLoopNum == MAX_LOOP_NUM)
6462 res = !optLoopContains(lnum, vnLoopNum);
6466 loopVnInvariantCache->Set(vn, res);
6470 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6472 if (tree->OperIsLocal())
6474 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6475 unsigned lclNum = lclVar->gtLclNum;
6477 // The lvlVar must be have an Ssa tracked lifetime
6478 if (fgExcludeFromSsa(lclNum))
6483 // If the loop does not contains the SSA def we can hoist it.
6484 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6489 else if (tree->OperIsConst())
6493 else // If every one of the children nodes are valid at this Loop's Head.
6495 unsigned nChildren = tree->NumChildren();
6496 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6498 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6508 /*****************************************************************************
6510 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6511 * header. The pre-header will replace the current lpHead in the loop table.
6512 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6513 * will also be dominated by the loop-top, lpHead->bbNext.
6517 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6519 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6521 /* This loop has to be a "do-while" loop */
6523 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6525 /* Have we already created a loop-preheader block? */
6527 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6532 BasicBlock* head = pLoopDsc->lpHead;
6533 BasicBlock* top = pLoopDsc->lpTop;
6534 BasicBlock* entry = pLoopDsc->lpEntry;
6536 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6537 if (!BasicBlock::sameTryRegion(head, entry))
6542 // Ensure that lpHead always dominates lpEntry
6544 noway_assert(fgDominate(head, entry));
6546 /* Get hold of the first block of the loop body */
6548 assert(top == entry);
6550 /* Allocate a new basic block */
6552 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6553 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6555 // Must set IL code offset
6556 preHead->bbCodeOffs = top->bbCodeOffs;
6558 // Set the default value of the preHead weight in case we don't have
6559 // valid profile data and since this blocks weight is just an estimate
6560 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6562 preHead->inheritWeight(head);
6563 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6568 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6569 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6573 // The preheader block is part of the containing loop (if any).
6574 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6576 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6578 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6580 preHead->bbWeight = 0;
6581 preHead->bbFlags |= BBF_RUN_RARELY;
6585 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6586 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6587 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6589 if (allValidProfileWeights)
6591 double loopEnteredCount;
6592 double loopSkippedCount;
6594 if (fgHaveValidEdgeWeights)
6596 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6597 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6598 noway_assert(edgeToNext != nullptr);
6599 noway_assert(edgeToJump != nullptr);
6602 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6604 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6608 loopEnteredCount = (double)head->bbNext->bbWeight;
6609 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6612 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6614 // Calculate a good approximation of the preHead's block weight
6615 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6616 preHead->setBBWeight(max(preHeadWeight, 1));
6617 noway_assert(!preHead->isRunRarely());
6622 // Link in the preHead block.
6623 fgInsertBBbefore(top, preHead);
6625 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6626 // However, that is too expensive at this point. Instead, we update the phi
6627 // node block references, if we created pre-header block due to hoisting.
6628 // This is sufficient because any definition participating in SSA that flowed
6629 // into the phi via the loop header block will now flow through the preheader
6630 // block from the header block.
6632 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6634 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6635 if (tree->OperGet() != GT_ASG)
6639 GenTreePtr op2 = tree->gtGetOp2();
6640 if (op2->OperGet() != GT_PHI)
6644 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6645 while (args != nullptr)
6647 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6648 if (phiArg->gtPredBB == head)
6650 phiArg->gtPredBB = preHead;
6652 args = args->Rest();
6656 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6657 // to set the handler index on the pre header without updating the exception table.
6658 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6660 // Update the EH table to make the hoisted block part of the loop's EH block.
6661 fgExtendEHRegionBefore(top);
6663 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6664 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6666 /* Update the loop entry */
6668 pLoopDsc->lpHead = preHead;
6669 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6671 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6672 All predecessors of 'beg', (which is the entry in the loop)
6673 now have to jump to 'preHead', unless they are dominated by 'head' */
6675 preHead->bbRefs = 0;
6676 fgAddRefPred(preHead, head);
6677 bool checkNestedLoops = false;
6679 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6681 BasicBlock* predBlock = pred->flBlock;
6683 if (fgDominate(top, predBlock))
6685 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6686 // (we know that 'head' dominates 'top'), but using 'top' instead of
6687 // 'head' in the test allows us to not enter here if 'predBlock == head'
6689 if (predBlock != pLoopDsc->lpBottom)
6691 noway_assert(predBlock != head);
6692 checkNestedLoops = true;
6697 switch (predBlock->bbJumpKind)
6700 noway_assert(predBlock == head);
6704 if (predBlock == head)
6706 noway_assert(predBlock->bbJumpDest != top);
6712 case BBJ_EHCATCHRET:
6713 noway_assert(predBlock->bbJumpDest == top);
6714 predBlock->bbJumpDest = preHead;
6715 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6717 if (predBlock == head)
6719 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6720 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6721 // Just break, pred will be removed after switch.
6725 fgRemoveRefPred(top, predBlock);
6726 fgAddRefPred(preHead, predBlock);
6732 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6733 BasicBlock** jumpTab;
6734 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6739 if ((*jumpTab) == top)
6741 (*jumpTab) = preHead;
6743 fgRemoveRefPred(top, predBlock);
6744 fgAddRefPred(preHead, predBlock);
6745 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6747 } while (++jumpTab, --jumpCnt);
6750 noway_assert(!"Unexpected bbJumpKind");
6755 noway_assert(!fgGetPredForBlock(top, preHead));
6756 fgRemoveRefPred(top, head);
6757 fgAddRefPred(top, preHead);
6760 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6761 (other than the back-edge of the loop we are considering) then we likely have nested
6762 do-while loops with the same entry block and inserting the preheader block changes the head
6763 of all the nested loops. Now we will update this piece of information in the loop table, and
6764 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6765 do-while loops with the same entry block).
6767 if (checkNestedLoops)
6769 for (unsigned l = 0; l < optLoopCount; l++)
6771 if (optLoopTable[l].lpHead == head)
6773 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6774 noway_assert(optLoopTable[l].lpEntry == top);
6775 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6776 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6780 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6781 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6789 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6791 for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
6793 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6797 if (optLoopTable[lnum].lpEntry == blk)
6806 void Compiler::optComputeLoopSideEffects()
6809 for (lnum = 0; lnum < optLoopCount; lnum++)
6811 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6812 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6813 optLoopTable[lnum].lpContainsCall = false;
6816 for (lnum = 0; lnum < optLoopCount; lnum++)
6818 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6823 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6824 { // Is outermost...
6825 optComputeLoopNestSideEffects(lnum);
6829 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6830 #ifndef _TARGET_64BIT_
6831 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6834 for (unsigned i = 0; i < lvaCount; i++)
6836 LclVarDsc* varDsc = &lvaTable[i];
6837 if (varDsc->lvTracked)
6839 if (varTypeIsFloating(varDsc->lvType))
6841 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6843 #ifndef _TARGET_64BIT_
6844 else if (varTypeIsLong(varDsc->lvType))
6846 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6853 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6855 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6856 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6857 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6859 optComputeLoopSideEffectsOfBlock(bbInLoop);
6863 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6865 unsigned mostNestedLoop = blk->bbNatLoopNum;
6866 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6868 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6870 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6872 // Now iterate over the remaining statements, and their trees.
6873 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6875 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6877 genTreeOps oper = tree->OperGet();
6879 // Even after we set heapHavoc we still may want to know if a loop contains calls
6882 if (oper == GT_CALL)
6884 // Record that this loop contains a call
6885 AddContainsCallAllContainingLoops(mostNestedLoop);
6888 // If we just set lpContainsCall or it was previously set
6889 if (optLoopTable[mostNestedLoop].lpContainsCall)
6891 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6895 // We are just looking for GT_CALL nodes after heapHavoc was set.
6899 // otherwise heapHavoc is not set
6902 // This body is a distillation of the heap-side effect code of value numbering.
6903 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6904 // that the compiler creates.
6906 if (GenTree::OperIsAssignment(oper))
6908 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6910 if (lhs->OperGet() == GT_IND)
6912 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6913 FieldSeqNode* fldSeqArrElem = nullptr;
6915 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6923 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6925 // If it's a local byref for which we recorded a value number, use that...
6926 GenTreeLclVar* argLcl = arg->AsLclVar();
6927 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6930 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6932 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6933 funcApp.m_func == VNF_PtrToArrElem)
6935 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6936 CORINFO_CLASS_HANDLE elemType =
6937 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6938 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6939 // Don't set heapHavoc below.
6946 // Is the LHS an array index expression?
6947 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6949 // We actually ignore "fldSeq" -- any modification to an S[], at any
6950 // field of "S", will lose all information about the array type.
6951 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6952 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6956 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6958 GenTreePtr obj = nullptr; // unused
6959 GenTreePtr staticOffset = nullptr; // unused
6960 FieldSeqNode* fldSeq = nullptr;
6962 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6963 (fldSeq != FieldSeqStore::NotAField()))
6965 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6966 assert(fldSeq != nullptr);
6967 if (fldSeq->IsFirstElemFieldSeq())
6969 fldSeq = fldSeq->m_next;
6970 assert(fldSeq != nullptr);
6973 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6981 else if (lhs->OperIsBlk())
6983 GenTreeLclVarCommon* lclVarTree;
6985 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6987 // For now, assume arbitrary side effects on the heap...
6991 else if (lhs->OperGet() == GT_CLS_VAR)
6993 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6995 // Otherwise, must be local lhs form. I should assert that.
6996 else if (lhs->OperGet() == GT_LCL_VAR)
6998 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6999 GenTreePtr rhs = tree->gtOp.gtOp2;
7000 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
7001 // If we gave the RHS a value number, propagate it.
7002 if (rhsVN != ValueNumStore::NoVN)
7004 rhsVN = vnStore->VNNormVal(rhsVN);
7005 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
7007 lvaTable[lhsLcl->GetLclNum()]
7008 .GetPerSsaData(lhsLcl->GetSsaNum())
7009 ->m_vnPair.SetLiberal(rhsVN);
7014 else // not GenTree::OperIsAssignment(oper)
7019 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7023 // Is it an addr of a array index expression?
7025 GenTreePtr addrArg = tree->gtOp.gtOp1;
7026 if (addrArg->OperGet() == GT_IND)
7028 // Is the LHS an array index expression?
7029 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7032 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7034 CORINFO_CLASS_HANDLE elemType =
7035 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7036 tree->gtVNPair.SetBoth(
7037 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7038 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7039 // The rest are dummy arguments.
7040 vnStore->VNForNull(), vnStore->VNForNull(),
7041 vnStore->VNForNull()));
7047 case GT_LOCKADD: // Binop
7048 case GT_XADD: // Binop
7049 case GT_XCHG: // Binop
7050 case GT_CMPXCHG: // Specialop
7058 GenTreeCall* call = tree->AsCall();
7060 // Record that this loop contains a call
7061 AddContainsCallAllContainingLoops(mostNestedLoop);
7063 if (call->gtCallType == CT_HELPER)
7065 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7066 if (s_helperCallProperties.MutatesHeap(helpFunc))
7070 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7072 // If the call is labeled as "Hoistable", then we've checked the
7073 // class that would be constructed, and it is not precise-init, so
7074 // the cctor will not be run by this call. Otherwise, it might be,
7075 // and might have arbitrary side effects.
7076 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7090 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7099 // Record that all loops containing this block have heap havoc effects.
7100 unsigned lnum = mostNestedLoop;
7101 while (lnum != BasicBlock::NOT_IN_LOOP)
7103 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7104 lnum = optLoopTable[lnum].lpParent;
7109 // Marks the containsCall information to "lnum" and any parent loops.
7110 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7112 assert(0 <= lnum && lnum < optLoopCount);
7113 while (lnum != BasicBlock::NOT_IN_LOOP)
7115 optLoopTable[lnum].lpContainsCall = true;
7116 lnum = optLoopTable[lnum].lpParent;
7120 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7121 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7123 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7124 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7126 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7127 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7130 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7131 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7133 assert(0 <= lnum && lnum < optLoopCount);
7134 while (lnum != BasicBlock::NOT_IN_LOOP)
7136 optLoopTable[lnum].AddVariableLiveness(this, blk);
7137 lnum = optLoopTable[lnum].lpParent;
7141 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7142 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7144 assert(0 <= lnum && lnum < optLoopCount);
7145 while (lnum != BasicBlock::NOT_IN_LOOP)
7147 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7148 lnum = optLoopTable[lnum].lpParent;
7152 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7153 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7155 assert(0 <= lnum && lnum < optLoopCount);
7156 while (lnum != BasicBlock::NOT_IN_LOOP)
7158 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7159 lnum = optLoopTable[lnum].lpParent;
7163 /*****************************************************************************
7165 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7166 * The 'keepList'is either a single tree or a list of trees that are formed by
7167 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7168 * gtExtractSideEffList method.
7172 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7174 GenTreePtr tree = *pTree;
7175 Compiler* comp = data->compiler;
7176 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7178 // We may have a non-NULL side effect list that is being kept
7182 GenTreePtr keptTree = keepList;
7183 while (keptTree->OperGet() == GT_COMMA)
7185 assert(keptTree->OperKind() & GTK_SMPOP);
7186 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7187 GenTreePtr op2 = keptTree->gtGetOp2();
7189 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7190 // that is being kept because it contains some side-effect
7194 // This tree and all of its sub trees are being kept.
7195 return WALK_SKIP_SUBTREES;
7198 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7199 // which can again be another GT_COMMA or the final side-effect part
7203 if (tree == keptTree)
7205 // This tree and all of its sub trees are being kept.
7206 return WALK_SKIP_SUBTREES;
7210 // This node is being removed from the graph of GenTreePtr
7212 // Look for any local variable references
7214 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7219 /* This variable ref is going away, decrease its ref counts */
7221 lclNum = tree->gtLclVarCommon.gtLclNum;
7222 assert(lclNum < comp->lvaCount);
7223 varDsc = comp->lvaTable + lclNum;
7225 // make sure it's been initialized
7226 assert(comp->compCurBB != nullptr);
7227 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7229 /* Decrement its lvRefCnt and lvRefCntWtd */
7231 // Use getBBWeight to determine the proper block weight.
7232 // This impacts the block weights when we have IBC data.
7233 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7236 return WALK_CONTINUE;
7239 /*****************************************************************************
7241 * Routine called to decrement the LclVar ref counts when removing a tree
7242 * during the remove RangeCheck phase.
7243 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7244 * unless the node is found in the 'keepList' (which are saved side effects)
7245 * The keepList is communicated using the walkData.pCallbackData field
7246 * Also the compCurBB must be set to the current BasicBlock which contains
7247 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7250 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7252 // We communicate this value using the walkData.pCallbackData field
7254 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7257 /*****************************************************************************
7259 * Given an array index node, mark it as not needing a range check.
7262 void Compiler::optRemoveRangeCheck(
7263 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7279 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7282 noway_assert(stmt->gtOper == GT_STMT);
7283 noway_assert(tree->gtOper == GT_COMMA);
7284 noway_assert(tree->gtOp.gtOp1->OperIsBoundsCheck());
7285 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7287 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7292 printf("Before optRemoveRangeCheck:\n");
7297 GenTreePtr sideEffList = nullptr;
7300 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7303 // Decrement the ref counts for any LclVars that are being deleted
7305 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7307 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7308 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7310 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7311 tree->gtFlags |= GTF_DONT_CSE;
7313 /* Recalculate the gtCostSz, etc... */
7314 gtSetStmtInfo(stmt);
7316 /* Re-thread the nodes if necessary */
7317 if (fgStmtListThreaded)
7325 printf("After optRemoveRangeCheck:\n");
7331 /*****************************************************************************
7332 * Return the scale in an array reference, given a pointer to the
7333 * multiplication node.
7336 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7339 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7340 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7342 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7344 if (mul->gtOper == GT_LSH)
7346 scale = ((ssize_t)1) << scale;
7349 GenTreePtr index = mul->gtOp.gtOp1;
7351 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7353 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7354 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7355 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7356 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7357 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7358 index = index->gtOp.gtOp1;
7361 assert(!bRngChk || index->gtOper != GT_COMMA);
7371 /*****************************************************************************
7372 * Find the last assignment to of the local variable in the block. Return
7373 * RHS or NULL. If any local variable in the RHS has been killed in
7374 * intervening code, return NULL. If the variable being searched for is killed
7375 * in the intervening code, return NULL.
7379 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7381 VARSET_TP* pKilledInOut,
7382 bool* pLhsRhsKilledAfterInit)
7384 assert(pKilledInOut);
7385 assert(pLhsRhsKilledAfterInit);
7387 *pLhsRhsKilledAfterInit = false;
7389 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7391 GenTreePtr list = block->bbTreeList;
7392 if (list == nullptr)
7397 GenTreePtr rhs = nullptr;
7398 GenTreePtr stmt = list;
7401 stmt = stmt->gtPrev;
7402 if (stmt == nullptr)
7407 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7408 // If we encounter an assignment to a local variable,
7409 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7411 // And the assigned variable equals the input local,
7412 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7414 // If the assignment is '=' and it is not a conditional, then return rhs.
7415 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7417 rhs = tree->gtOp.gtOp2;
7419 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7420 // as we found a kill to the input local.
7423 *pLhsRhsKilledAfterInit = true;
7424 assert(rhs == nullptr);
7430 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7431 if (varDsc == nullptr)
7435 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7438 } while (stmt != list);
7445 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7446 varRefKinds rhsRefs = VR_NONE;
7447 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7448 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7449 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7451 // If RHS has been indirectly referenced, consider it a write and a kill.
7452 *pLhsRhsKilledAfterInit = true;
7459 /*****************************************************************************
7461 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7466 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7468 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7470 add1 += op1->gtIntCon.gtIconVal;
7471 add2 += op2->gtIntCon.gtIconVal;
7475 /* Check for +/- constant on either operand */
7477 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7479 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7480 op1 = op1->gtOp.gtOp1;
7483 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7485 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7486 op2 = op2->gtOp.gtOp1;
7489 /* We only allow local variable references */
7491 if (op1->gtOper != GT_LCL_VAR)
7493 if (op2->gtOper != GT_LCL_VAR)
7495 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7498 /* NOTE: Caller ensures that this variable has only one def */
7500 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7501 // printf("size [%d]:\n", add2); gtDispTree(op2);
7505 return (bool)(add1 <= add2);
7510 //------------------------------------------------------------------------------
7511 // optObtainLoopCloningOpts: Identify optimization candidates and update
7512 // the "context" for array optimizations.
7515 // context - data structure where all loop cloning info is kept. The
7516 // optInfo fields of the context are updated with the
7517 // identified optimization candidates.
7519 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7521 for (unsigned i = 0; i < optLoopCount; i++)
7523 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7524 if (optIsLoopClonable(i))
7526 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7528 optIdentifyLoopOptInfo(i, context);
7531 JITDUMP("------------------------------------------------------------\n");
7536 //------------------------------------------------------------------------
7537 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7538 // check if the loop is suitable for the optimizations performed.
7541 // loopNum - the current loop index for which conditions are derived.
7542 // context - data structure where all loop cloning candidates will be
7546 // If the loop is not suitable for the optimizations, return false - context
7547 // should not contain any optimization candidate for the loop if false.
7548 // Else return true.
7551 // Check if the loop is well formed for this optimization and identify the
7552 // optimization candidates and update the "context" parameter with all the
7553 // contextual information necessary to perform the optimization later.
7555 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7557 noway_assert(loopNum < optLoopCount);
7559 LoopDsc* pLoop = &optLoopTable[loopNum];
7561 if (!(pLoop->lpFlags & LPFLG_ITER))
7563 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7567 unsigned ivLclNum = pLoop->lpIterVar();
7568 if (lvaVarAddrExposed(ivLclNum))
7570 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7574 BasicBlock* head = pLoop->lpHead;
7575 BasicBlock* end = pLoop->lpBottom;
7576 BasicBlock* beg = head->bbNext;
7578 if (end->bbJumpKind != BBJ_COND)
7580 JITDUMP("> Couldn't find termination test.\n");
7584 if (end->bbJumpDest != beg)
7586 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7590 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7591 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7593 JITDUMP("> Loop iteration operator not matching\n");
7597 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7598 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7600 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7604 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7605 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7606 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7607 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7609 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7610 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7614 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7616 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7617 pLoop->lpTestTree->gtTreeID);
7622 GenTreePtr op1 = pLoop->lpIterator();
7623 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7626 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7627 end->bbNext ? end->bbNext->bbNum : 0);
7629 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7630 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7633 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7636 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7643 //---------------------------------------------------------------------------------------------------------------
7644 // optExtractArrIndex: Try to extract the array index from "tree".
7647 // tree the tree to be checked if it is the array [] operation.
7648 // result the extracted GT_INDEX information is updated in result.
7649 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7652 // Returns true if array index can be extracted, else, return false. See assumption about
7653 // what will be extracted. The "result" variable's rank parameter is advanced for every
7654 // dimension of [] encountered.
7657 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7658 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7659 // to reconstruct this to be able to know if this is an array access.
7662 // The method extracts only if the array base and indices are GT_LCL_VAR.
7664 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7666 // [000000001AF828D8] ---XG------- indir int
7667 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7668 // [000000001AF87340] ------------ + byref
7669 // [000000001AF87160] -------N---- const long 2
7670 // [000000001AF871D8] ------------ << long
7671 // [000000001AF870C0] ------------ cast long <- int
7672 // [000000001AF86F30] i----------- lclVar int V04 loc0
7673 // [000000001AF87250] ------------ + byref
7674 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7675 // [000000001AF87468] ---XG------- comma int
7676 // [000000001AF87020] ---X-------- arrBndsChk void
7677 // [000000001AF86FA8] ---X-------- arrLen int
7678 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7679 // [000000001AF82860] ------------ lclVar int V04 loc0
7680 // [000000001AF829F0] -A-XG------- = int
7681 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7683 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7685 if (tree->gtOper != GT_COMMA)
7689 GenTreePtr before = tree->gtGetOp1();
7690 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7694 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7695 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7699 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7703 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7704 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7709 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7711 GenTreePtr after = tree->gtGetOp2();
7713 if (after->gtOper != GT_IND)
7717 // It used to be the case that arrBndsChks for struct types would fail the previous check because
7718 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
7719 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
7720 // that we were not previously cloning.)
7721 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
7723 if (varTypeIsStruct(after))
7728 GenTreePtr sibo = after->gtGetOp1();
7729 if (sibo->gtOper != GT_ADD)
7733 GenTreePtr sib = sibo->gtGetOp1();
7734 GenTreePtr ofs = sibo->gtGetOp2();
7735 if (ofs->gtOper != GT_CNS_INT)
7739 if (sib->gtOper != GT_ADD)
7743 GenTreePtr si = sib->gtGetOp2();
7744 GenTreePtr base = sib->gtGetOp1();
7745 if (si->gtOper != GT_LSH)
7749 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7753 GenTreePtr scale = si->gtGetOp2();
7754 GenTreePtr index = si->gtGetOp1();
7755 if (scale->gtOper != GT_CNS_INT)
7759 #ifdef _TARGET_AMD64_
7760 if (index->gtOper != GT_CAST)
7764 GenTreePtr indexVar = index->gtGetOp1();
7766 GenTreePtr indexVar = index;
7768 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7772 if (lhsNum == BAD_VAR_NUM)
7774 result->arrLcl = arrLcl;
7776 result->indLcls.Push(indLcl);
7777 result->bndsChks.Push(tree);
7778 result->useBlock = compCurBB;
7784 //---------------------------------------------------------------------------------------------------------------
7785 // optReconstructArrIndex: Reconstruct array index.
7788 // tree the tree to be checked if it is an array [][][] operation.
7789 // result the extracted GT_INDEX information.
7790 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7793 // Returns true if array index can be extracted, else, return false. "rank" field in
7794 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7797 // Recursively look for a list of array indices. In the example below, we encounter,
7798 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7799 // The return value would then be:
7800 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7802 // V00[V01][V02] would be morphed as:
7804 // [000000001B366848] ---XG------- indir int
7805 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7806 // [000000001B36C200] ---XG------- comma int
7807 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7808 // [000000001B36C278] -A-XG------- comma int
7809 // [000000001B366730] R--XG------- indir ref
7810 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7811 // [000000001B36C818] ---XG------- comma ref
7812 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7813 // [000000001B36BB60] -A-XG------- = ref
7814 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7815 // [000000001B36A668] -A-XG------- = int
7816 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7819 // The method extracts only if the array base and indices are GT_LCL_VAR.
7821 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7823 // If we can extract "tree" (which is a top level comma) return.
7824 if (optExtractArrIndex(tree, result, lhsNum))
7828 // We have a comma (check if array base expr is computed in "before"), descend further.
7829 else if (tree->OperGet() == GT_COMMA)
7831 GenTreePtr before = tree->gtGetOp1();
7832 // "before" should evaluate an array base for the "after" indexing.
7833 if (before->OperGet() != GT_ASG)
7837 GenTreePtr lhs = before->gtGetOp1();
7838 GenTreePtr rhs = before->gtGetOp2();
7840 // "rhs" should contain an GT_INDEX
7841 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7845 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7846 GenTreePtr after = tree->gtGetOp2();
7847 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7848 return optExtractArrIndex(after, result, lhsNum);
7854 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7856 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7859 //-------------------------------------------------------------------------
7860 // optIsStackLocalInvariant: Is stack local invariant in loop.
7863 // loopNum The loop in which the variable is tested for invariance.
7864 // lclNum The local that is tested for invariance in the loop.
7867 // Returns true if the variable is loop invariant in loopNum.
7869 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7871 if (lvaVarAddrExposed(lclNum))
7875 if (optIsVarAssgLoop(loopNum, lclNum))
7882 //----------------------------------------------------------------------------------------------
7883 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7884 // identify as potential candidate and update the loop context.
7887 // tree The tree encountered during the tree walk.
7888 // info Supplies information about the current block or stmt in which the tree is.
7889 // Also supplies the "context" pointer for updating with loop cloning
7890 // candidates. Also supplies loopNum.
7893 // If array index can be reconstructed, check if the iter var of the loop matches the
7894 // array index var in some dim. Also ensure other index vars before the identified
7895 // dim are loop invariant.
7898 // Skip sub trees if the optimization candidate is identified or else continue walking
7900 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7902 ArrIndex arrIndex(getAllocator());
7904 // Check if array index can be optimized.
7905 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7907 assert(tree->gtOper == GT_COMMA);
7911 JITDUMP("Found ArrIndex at tree ");
7913 printf(" which is equivalent to: ");
7918 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7920 return WALK_SKIP_SUBTREES;
7923 // Walk the dimensions and see if iterVar of the loop is used as index.
7924 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7926 // Is index variable also used as the loop iter var.
7927 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7929 // Check the previous indices are all loop invariant.
7930 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7932 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7934 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7935 return WALK_SKIP_SUBTREES;
7941 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7943 JITDUMP(" on dim %d\n", dim);
7946 // Update the loop context.
7947 info->context->EnsureLoopOptInfo(info->loopNum)
7948 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7952 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7956 return WALK_SKIP_SUBTREES;
7958 else if (tree->gtOper == GT_ARR_ELEM)
7960 // TODO-CQ: CLONE: Implement.
7961 return WALK_SKIP_SUBTREES;
7963 return WALK_CONTINUE;
7966 struct optRangeCheckDsc
7968 Compiler* pCompiler;
7972 Walk to make sure that only locals and constants are contained in the index
7975 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7977 GenTreePtr tree = *pTree;
7978 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7980 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7982 pData->bValidIndex = false;
7986 if (tree->gtOper == GT_LCL_VAR)
7988 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7990 pData->bValidIndex = false;
7995 return WALK_CONTINUE;
7999 returns true if a range check can legally be removed (for the moment it checks
8000 that the array is a local array (non subject to racing conditions) and that the
8001 index is either a constant or a local
8003 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
8005 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
8006 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
8007 GenTreePtr pArray = bndsChk->GetArray();
8008 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
8012 GenTreePtr pIndex = bndsChk->gtIndex;
8014 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
8015 // Otherwise we can be targeted by malicious race-conditions.
8016 if (pArray != nullptr)
8018 if (pArray->gtOper != GT_LCL_VAR)
8024 printf("Can't remove range check if the array isn't referenced with a local\n");
8032 noway_assert(pArray->gtType == TYP_REF);
8033 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8035 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8037 // If the array address has been taken, don't do the optimization
8038 // (this restriction can be lowered a bit, but i don't think it's worth it)
8039 CLANG_FORMAT_COMMENT_ANCHOR;
8043 printf("Can't remove range check if the array has its address taken\n");
8052 optRangeCheckDsc Data;
8053 Data.pCompiler = this;
8054 Data.bValidIndex = true;
8056 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8058 if (!Data.bValidIndex)
8063 printf("Can't remove range check with this index");
8074 /******************************************************************************
8076 * Replace x==null with (x|x)==0 if x is a GC-type.
8077 * This will stress code-gen and the emitter to make sure they support such trees.
8082 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8084 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8089 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8090 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8092 noway_assert(condStmt->gtOper == GT_JTRUE);
8097 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8099 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8104 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8109 GenTreePtr comparandClone = gtCloneExpr(comparand);
8111 // Bump up the ref-counts of any variables in 'comparandClone'
8112 compCurBB = condBlock;
8113 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8115 noway_assert(relop->gtOp.gtOp1 == comparand);
8116 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8117 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8119 // Comparand type is already checked, and we have const int, there is no harm
8120 // morphing it into a TYP_I_IMPL.
8121 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8122 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8127 /******************************************************************************
8128 * Function used by folding of boolean conditionals
8129 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8130 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8131 * being a boolean lclVar and "op2" the const 0/1.
8132 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8133 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8134 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8135 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8136 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8139 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8141 bool isBool = false;
8143 noway_assert(condBranch->gtOper == GT_JTRUE);
8144 GenTree* cond = condBranch->gtOp.gtOp1;
8146 /* The condition must be "!= 0" or "== 0" */
8148 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8153 /* Return the compare node to the caller */
8157 /* Get hold of the comparands */
8159 GenTree* opr1 = cond->gtOp.gtOp1;
8160 GenTree* opr2 = cond->gtOp.gtOp2;
8162 if (opr2->gtOper != GT_CNS_INT)
8167 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8172 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8174 /* Is the value a boolean?
8175 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8176 * a local variable that is marked as being boolean (lvIsBoolean) */
8178 if (opr1->gtFlags & GTF_BOOLEAN)
8182 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8186 else if (opr1->gtOper == GT_LCL_VAR)
8188 /* is it a boolean local variable */
8190 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8191 noway_assert(lclNum < lvaCount);
8193 if (lvaTable[lclNum].lvIsBoolean)
8199 /* Was our comparison against the constant 1 (i.e. true) */
8202 // If this is a boolean expression tree we can reverse the relop
8203 // and change the true to false.
8206 gtReverseCond(cond);
8207 opr2->gtIntCon.gtIconVal = 0;
8219 void Compiler::optOptimizeBools()
8224 printf("*************** In optOptimizeBools()\n");
8227 printf("Blocks/Trees before phase\n");
8228 fgDispBasicBlocks(true);
8238 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8240 /* We're only interested in conditional jumps here */
8242 if (b1->bbJumpKind != BBJ_COND)
8247 /* If there is no next block, we're done */
8249 BasicBlock* b2 = b1->bbNext;
8255 /* The next block must not be marked as BBF_DONT_REMOVE */
8256 if (b2->bbFlags & BBF_DONT_REMOVE)
8261 /* The next block also needs to be a condition */
8263 if (b2->bbJumpKind != BBJ_COND)
8266 optOptimizeBoolsGcStress(b1);
8271 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8273 if (b1->bbJumpDest == b2->bbJumpDest)
8275 /* Given the following sequence of blocks :
8279 we wil try to fold it to :
8280 B1: brtrue(t1|t2, BX)
8286 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8288 /* Given the following sequence of blocks :
8292 we will try to fold it to :
8293 B1: brtrue((!t1)&&t2, B3)
8304 /* The second block must contain a single statement */
8306 GenTreePtr s2 = b2->bbTreeList;
8307 if (s2->gtPrev != s2)
8312 noway_assert(s2->gtOper == GT_STMT);
8313 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8314 noway_assert(t2->gtOper == GT_JTRUE);
8316 /* Find the condition for the first block */
8318 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8320 noway_assert(s1->gtOper == GT_STMT);
8321 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8322 noway_assert(t1->gtOper == GT_JTRUE);
8324 if (b2->countOfInEdges() > 1)
8329 /* Find the branch conditions of b1 and b2 */
8333 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8339 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8345 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8346 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8348 // Leave out floats where the bit-representation is more complicated
8349 // - there are two representations for 0.
8351 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8356 // Make sure the types involved are of the same sizes
8357 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8361 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8365 #ifdef _TARGET_ARMARCH_
8366 // Skip the small operand which we cannot encode.
8367 if (varTypeIsSmall(c1->TypeGet()))
8370 /* The second condition must not contain side effects */
8372 if (c2->gtFlags & GTF_GLOB_EFFECT)
8377 /* The second condition must not be too expensive */
8381 if (c2->gtCostEx > 12)
8388 var_types foldType = c1->TypeGet();
8389 if (varTypeIsGC(foldType))
8391 foldType = TYP_I_IMPL;
8396 /* Both conditions must be the same */
8398 if (t1->gtOper != t2->gtOper)
8403 if (t1->gtOper == GT_EQ)
8405 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8406 So we will branch to BX if (c1&c2)==0 */
8413 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8414 So we will branch to BX if (c1|c2)!=0 */
8422 /* The b1 condition must be the reverse of the b2 condition */
8424 if (t1->gtOper == t2->gtOper)
8429 if (t1->gtOper == GT_EQ)
8431 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8432 So we will branch to BX if (c1&c2)!=0 */
8439 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8440 So we will branch to BX if (c1|c2)==0 */
8447 // Anding requires both values to be 0 or 1
8449 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8455 // Now update the trees
8457 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8460 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8461 cmpOp1->gtFlags |= GTF_BOOLEAN;
8465 t1->gtOp.gtOp1 = cmpOp1;
8466 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8468 #if FEATURE_SET_FLAGS
8469 // For comparisons against zero we will have the GTF_SET_FLAGS set
8470 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8471 // during the CSE phase.
8473 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8474 // as they are no longer feeding directly into a comparisons against zero
8476 // Make sure that the GTF_SET_FLAGS bit is cleared.
8477 // Fix 388436 ARM JitStress WP7
8478 c1->gtFlags &= ~GTF_SET_FLAGS;
8479 c2->gtFlags &= ~GTF_SET_FLAGS;
8481 // The new top level node that we just created does feed directly into
8482 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8483 // we generate an instuction that sets the flags, which allows us
8484 // to omit the cmp with zero instruction.
8486 // Request that the codegen for cmpOp1 sets the condition flags
8487 // when it generates the code for cmpOp1.
8489 cmpOp1->gtRequestSetFlags();
8492 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8495 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8499 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8503 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8505 fgRemoveRefPred(b1->bbJumpDest, b1);
8507 b1->bbJumpDest = b2->bbJumpDest;
8509 fgAddRefPred(b2->bbJumpDest, b1);
8512 noway_assert(edge1 != nullptr);
8513 noway_assert(edge2 != nullptr);
8515 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8516 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8517 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8519 edge1->flEdgeWeightMin = edgeSumMin;
8520 edge1->flEdgeWeightMax = edgeSumMax;
8524 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8525 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8528 /* Get rid of the second block (which is a BBJ_COND) */
8530 noway_assert(b1->bbJumpKind == BBJ_COND);
8531 noway_assert(b2->bbJumpKind == BBJ_COND);
8532 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8533 noway_assert(b1->bbNext == b2);
8534 noway_assert(b2->bbNext);
8537 b2->bbFlags |= BBF_REMOVED;
8539 // If b2 was the last block of a try or handler, update the EH table.
8541 ehUpdateForDeletedBlock(b2);
8543 /* Update bbRefs and bbPreds */
8545 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8546 * Remove pred 'b2' for 'b2->bbJumpDest' */
8548 fgReplacePred(b2->bbNext, b2, b1);
8550 fgRemoveRefPred(b2->bbJumpDest, b2);
8552 /* Update the block numbers and try again */
8564 // Update loop table
8565 fgUpdateLoopsAfterCompacting(b1, b2);
8570 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8571 b1->bbNum, b2->bbNum);
8580 fgDebugCheckBBlist();