1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
826 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
828 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
830 else if (limitOp->gtOper == GT_ARR_LENGTH)
832 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
838 // Save the type of the comparison between the iterator and the limit.
839 optLoopTable[loopInd].lpTestTree = relop;
843 //----------------------------------------------------------------------------------
844 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
847 // incr The incr tree to be checked. Whether incr tree is
848 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
851 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
852 // and the rhs limit is extracted from the "test" tree. The limit information is
853 // added to the loop table.
856 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
858 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
861 genTreeOps updateOper;
862 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
863 if (iterVar != BAD_VAR_NUM)
865 // We have v = v op y type asg node.
878 // Increment should be by a const int.
879 // TODO-CQ: CLONE: allow variable increments.
880 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
889 //----------------------------------------------------------------------------------
890 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
893 // from, to - are blocks (beg, end) which are part of the loop.
894 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
895 // pIterVar - see return value.
898 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
902 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
903 // assigned in the loop.
905 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
908 unsigned iterVar = optIsLoopIncrTree(incr);
909 if (iterVar == BAD_VAR_NUM)
913 if (optIsVarAssigned(from, to, incr, iterVar))
915 JITDUMP("iterVar is assigned in loop\n");
923 //----------------------------------------------------------------------------------
924 // optIsLoopTestEvalIntoTemp:
925 // Pattern match if the test tree is computed into a tmp
926 // and the "tmp" is used as jump condition for loop termination.
929 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
930 // where Vtmp contains the actual loop test result.
931 // newStmt - contains the statement that is the actual test stmt involving
932 // the loop iterator.
935 // Returns true if a new test tree can be obtained.
938 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
939 // Then returns the rhs for def of Vtmp as the "test" node.
942 // This method just retrieves what it thinks is the "test" node,
943 // the callers are expected to verify that "iterVar" is used in the test.
945 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
947 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
949 if (test->gtOper != GT_JTRUE)
954 GenTreePtr relop = test->gtGetOp1();
955 noway_assert(relop->OperIsCompare());
957 GenTreePtr opr1 = relop->gtOp.gtOp1;
958 GenTreePtr opr2 = relop->gtOp.gtOp2;
960 // Make sure we have jtrue (vtmp != 0)
961 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
962 opr2->IsIntegralConst(0))
964 // Get the previous statement to get the def (rhs) of Vtmp to see
965 // if the "test" is evaluated into Vtmp.
966 GenTreePtr prevStmt = testStmt->gtPrev;
967 if (prevStmt == nullptr)
972 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
973 if (tree->OperGet() == GT_ASG)
975 GenTreePtr lhs = tree->gtOp.gtOp1;
976 GenTreePtr rhs = tree->gtOp.gtOp2;
978 // Return as the new test node.
979 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
981 if (rhs->OperIsCompare())
992 //----------------------------------------------------------------------------------
993 // optExtractInitTestIncr:
994 // Extract the "init", "test" and "incr" nodes of the loop.
997 // head - Loop head block
998 // bottom - Loop bottom block
999 // top - Loop top block
1000 // ppInit - The init stmt of the loop if found.
1001 // ppTest - The test stmt of the loop if found.
1002 // ppIncr - The incr stmt of the loop if found.
1005 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1006 // returns true. Returns false if the information can't be extracted.
1009 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1010 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1011 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1014 // +-------<-----------------<-----------+
1017 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1019 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1020 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1021 // in the jtrue condition.
1024 // This method just retrieves what it thinks is the "test" node,
1025 // the callers are expected to verify that "iterVar" is used in the test.
1027 bool Compiler::optExtractInitTestIncr(
1028 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1030 assert(ppInit != nullptr);
1031 assert(ppTest != nullptr);
1032 assert(ppIncr != nullptr);
1034 // Check if last two statements in the loop body are the increment of the iterator
1035 // and the loop termination test.
1036 noway_assert(bottom->bbTreeList != nullptr);
1037 GenTreePtr test = bottom->bbTreeList->gtPrev;
1038 noway_assert(test != nullptr && test->gtNext == nullptr);
1041 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1046 // Check if we have the incr tree before the test tree, if we don't,
1047 // check if incr is part of the loop "top".
1048 GenTreePtr incr = test->gtPrev;
1049 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1051 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1056 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1057 GenTreePtr topLast = top->bbTreeList->gtPrev;
1058 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1068 assert(test != incr);
1070 // Find the last statement in the loop pre-header which we expect to be the initialization of
1071 // the loop iterator.
1072 GenTreePtr phdr = head->bbTreeList;
1073 if (phdr == nullptr)
1078 GenTreePtr init = phdr->gtPrev;
1079 noway_assert(init != nullptr && (init->gtNext == nullptr));
1081 // If it is a duplicated loop condition, skip it.
1082 if (init->gtFlags & GTF_STMT_CMPADD)
1084 // Must be a duplicated loop condition.
1085 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1086 init = init->gtPrev;
1087 noway_assert(init != nullptr);
1090 noway_assert(init->gtOper == GT_STMT);
1091 noway_assert(test->gtOper == GT_STMT);
1092 noway_assert(incr->gtOper == GT_STMT);
1094 *ppInit = init->gtStmt.gtStmtExpr;
1095 *ppTest = test->gtStmt.gtStmtExpr;
1096 *ppIncr = incr->gtStmt.gtStmtExpr;
1101 /*****************************************************************************
1103 * Record the loop in the loop table.
1106 void Compiler::optRecordLoop(BasicBlock* head,
1112 unsigned char exitCnt)
1114 // Record this loop in the table, if there's room.
1116 assert(optLoopCount <= MAX_LOOP_NUM);
1117 if (optLoopCount == MAX_LOOP_NUM)
1120 loopOverflowThisMethod = true;
1125 // Assumed preconditions on the loop we're adding.
1126 assert(first->bbNum <= top->bbNum);
1127 assert(top->bbNum <= entry->bbNum);
1128 assert(entry->bbNum <= bottom->bbNum);
1129 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1131 // If the new loop contains any existing ones, add it in the right place.
1132 unsigned char loopInd = optLoopCount;
1133 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1135 unsigned char prev = prevPlus1 - 1;
1136 if (optLoopTable[prev].lpContainedBy(first, bottom))
1141 // Move up any loops if necessary.
1142 for (unsigned j = optLoopCount; j > loopInd; j--)
1144 optLoopTable[j] = optLoopTable[j - 1];
1148 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1150 // The loop is well-formed.
1151 assert(optLoopTable[i].lpWellFormed());
1152 // Check for disjoint.
1153 if (optLoopTable[i].lpDisjoint(first, bottom))
1157 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1158 assert(optLoopTable[i].lpContainedBy(first, bottom));
1162 optLoopTable[loopInd].lpHead = head;
1163 optLoopTable[loopInd].lpFirst = first;
1164 optLoopTable[loopInd].lpTop = top;
1165 optLoopTable[loopInd].lpBottom = bottom;
1166 optLoopTable[loopInd].lpEntry = entry;
1167 optLoopTable[loopInd].lpExit = exit;
1168 optLoopTable[loopInd].lpExitCnt = exitCnt;
1170 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1171 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1172 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1174 optLoopTable[loopInd].lpFlags = 0;
1176 // We haven't yet recorded any side effects.
1177 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1178 optLoopTable[loopInd].lpFieldsModified = nullptr;
1179 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1181 // If DO-WHILE loop mark it as such.
1182 if (head->bbNext == entry)
1184 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1187 // If single exit loop mark it as such.
1191 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1195 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1196 // We have the following restrictions:
1197 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1198 // 2. There must be a loop iterator (a local var) that is
1199 // incremented (decremented or lsh, rsh, mul) with a constant value
1200 // 3. The iterator is incremented exactly once
1201 // 4. The loop condition must use the iterator.
1203 if (bottom->bbJumpKind == BBJ_COND)
1208 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1213 unsigned iterVar = BAD_VAR_NUM;
1214 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1219 // Make sure the "iterVar" initialization is never skipped,
1220 // i.e. HEAD dominates the ENTRY.
1221 if (!fgDominate(head, entry))
1226 if (!optPopulateInitInfo(loopInd, init, iterVar))
1231 // Check that the iterator is used in the loop condition.
1232 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1237 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1238 // Record the iterator, the pointer to the test node
1239 // and the initial value of the iterator (constant or local var)
1240 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1243 optLoopTable[loopInd].lpIterTree = incr;
1246 // Save the initial value of the iterator - can be lclVar or constant
1247 // Flag the loop accordingly.
1253 simpleTestLoopCount++;
1256 // Check if a constant iteration loop.
1257 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1259 // This is a constant loop.
1260 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1262 constIterLoopCount++;
1269 printf("\nConstant loop initializer:\n");
1272 printf("\nConstant loop body:\n");
1274 BasicBlock* block = head;
1277 block = block->bbNext;
1278 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1280 if (stmt->gtStmt.gtStmtExpr == incr)
1285 gtDispTree(stmt->gtStmt.gtStmtExpr);
1287 } while (block != bottom);
1293 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1298 //------------------------------------------------------------------------
1299 // optPrintLoopRecording: Print a recording of the loop.
1302 // loopInd - loop index.
1304 void Compiler::optPrintLoopRecording(unsigned loopInd)
1306 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1307 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1308 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1309 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1310 optLoopTable[loopInd].lpExit);
1312 // If an iterator loop print the iterator and the initialization.
1313 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1315 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1317 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1319 printf("%d )", optLoopTable[loopInd].lpIterConst());
1321 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1323 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1325 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1327 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1330 // If a simple test condition print operator and the limits */
1331 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1333 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1335 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1338 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1340 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1349 void Compiler::optCheckPreds()
1352 BasicBlock* blockPred;
1355 for (block = fgFirstBB; block; block = block->bbNext)
1357 for (pred = block->bbPreds; pred; pred = pred->flNext)
1359 // make sure this pred is part of the BB list
1360 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1362 if (blockPred == pred->flBlock)
1367 noway_assert(blockPred);
1368 switch (blockPred->bbJumpKind)
1371 if (blockPred->bbJumpDest == block)
1377 noway_assert(blockPred->bbNext == block);
1379 case BBJ_EHFILTERRET:
1381 case BBJ_EHCATCHRET:
1382 noway_assert(blockPred->bbJumpDest == block);
1393 /*****************************************************************************
1394 * Find the natural loops, using dominators. Note that the test for
1395 * a loop is slightly different from the standard one, because we have
1396 * not done a depth first reordering of the basic blocks.
1399 void Compiler::optFindNaturalLoops()
1404 printf("*************** In optFindNaturalLoops()\n");
1410 flowList* predEntry;
1412 noway_assert(fgDomsComputed);
1416 hasMethodLoops = false;
1417 loopsThisMethod = 0;
1418 loopOverflowThisMethod = false;
1421 /* We will use the following terminology:
1422 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1423 Not part of the looping of the loop.
1424 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1425 * but not the outer loop. ???)
1426 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1427 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1428 * EXIT - the loop exit or the block right after the bottom
1429 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1431 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1465 unsigned char exitCount;
1467 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1473 // Blocks that are rarely run have a zero bbWeight and should
1474 // never be optimized here
1476 if (top->bbWeight == BB_ZERO_WEIGHT)
1481 for (pred = top->bbPreds; pred; pred = pred->flNext)
1483 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1484 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1485 * as the definition says, but merely an indication that we have a loop there).
1486 * Thus, we have to be very careful and after entry discovery check that it is indeed
1487 * the only place we enter the loop (especially for non-reducible flow graphs).
1490 bottom = pred->flBlock;
1493 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1495 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1496 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1497 (bottom->bbJumpKind == BBJ_SWITCH))
1499 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1500 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1504 BasicBlock* loopBlock;
1506 /* The presence of a "back edge" is an indication that a loop might be present here
1509 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1510 * node in the loop to any other node in the loop (wholly within the loop)
1511 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1512 * in the loop from outside the loop, and that is through the ENTRY
1515 /* Let's find the loop ENTRY */
1517 if (head->bbJumpKind == BBJ_ALWAYS)
1519 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1521 /* OK - we enter somewhere within the loop */
1522 entry = head->bbJumpDest;
1524 /* some useful asserts
1525 * Cannot enter at the top - should have being caught by redundant jumps */
1527 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1531 /* special case - don't consider now */
1532 // assert (!"Loop entered in weird way!");
1536 // Can we fall through into the loop?
1537 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1539 /* The ENTRY is at the TOP (a do-while loop) */
1544 goto NO_LOOP; // head does not flow into the loop bail for now
1547 // Now we find the "first" block -- the earliest block reachable within the loop.
1548 // This is usually the same as "top", but can differ in rare cases where "top" is
1549 // the entry block of a nested loop, and that nested loop branches backwards to a
1550 // a block before "top". We find this by searching for such backwards branches
1551 // in the loop known so far.
1552 BasicBlock* first = top;
1553 BasicBlock* newFirst;
1554 bool blocksToSearch = true;
1555 BasicBlock* validatedAfter = bottom->bbNext;
1556 while (blocksToSearch)
1558 blocksToSearch = false;
1560 blocksToSearch = false;
1561 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1563 unsigned nSucc = loopBlock->NumSucc();
1564 for (unsigned j = 0; j < nSucc; j++)
1566 BasicBlock* succ = loopBlock->GetSucc(j);
1567 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1568 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1574 if (newFirst != nullptr)
1576 validatedAfter = first;
1578 blocksToSearch = true;
1582 // Is "head" still before "first"? If not, we don't have a valid loop...
1583 if (head->bbNum >= first->bbNum)
1586 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1587 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1591 /* Make sure ENTRY dominates all blocks in the loop
1592 * This is necessary to ensure condition 2. above
1593 * At the same time check if the loop has a single exit
1594 * point - those loops are easier to optimize */
1596 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1598 if (!fgDominate(entry, loopBlock))
1603 if (loopBlock == bottom)
1605 if (bottom->bbJumpKind != BBJ_ALWAYS)
1607 /* there is an exit at the bottom */
1609 noway_assert(bottom->bbJumpDest == top);
1616 BasicBlock* exitPoint;
1618 switch (loopBlock->bbJumpKind)
1621 case BBJ_CALLFINALLY:
1623 case BBJ_EHCATCHRET:
1624 assert(loopBlock->bbJumpDest);
1625 exitPoint = loopBlock->bbJumpDest;
1627 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1629 /* exit from a block other than BOTTOM */
1638 case BBJ_EHFINALLYRET:
1639 case BBJ_EHFILTERRET:
1640 /* The "try" associated with this "finally" must be in the
1641 * same loop, so the finally block will return control inside the loop */
1646 /* those are exits from the loop */
1654 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1655 BasicBlock** jumpTab;
1656 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1660 noway_assert(*jumpTab);
1661 exitPoint = *jumpTab;
1663 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1668 } while (++jumpTab, --jumpCnt);
1672 noway_assert(!"Unexpected bbJumpKind");
1677 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1678 * This is to ensure condition 1. above which prevents marking fake loops
1680 * Below is an example:
1688 * The example above is not a loop since we bail after the first iteration
1690 * The condition we have to check for is
1691 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1692 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1694 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1695 * loop bottom then we cannot iterate
1697 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1698 * part of the loop nodes (as per definition they are loop exits executed only once),
1699 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1701 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1703 switch (loopBlock->bbJumpKind)
1708 if (fgDominate(loopBlock, bottom))
1717 bool canIterateLoop = false;
1719 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1721 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1723 canIterateLoop = true;
1726 else if (predEntry->flBlock != head)
1728 // The entry block has multiple predecessors outside the loop; the 'head'
1729 // block isn't the only one. We only support a single 'head', so bail.
1734 if (!canIterateLoop)
1739 /* Double check - make sure that all loop blocks except ENTRY
1740 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1741 * us from considering non-loops due to incorrectly assuming that we had a back edge
1744 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1747 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1749 if (loopBlock == entry)
1754 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1756 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1758 // noway_assert(!"Found loop with multiple entries");
1764 // Disqualify loops where the first block of the loop is less nested in EH than
1765 // the bottom block. That is, we don't want to handle loops where the back edge
1766 // goes from within an EH region to a first block that is outside that same EH
1767 // region. Note that we *do* handle loops where the first block is the *first*
1768 // block of a more nested EH region (since it is legal to branch to the first
1769 // block of an immediately more nested EH region). So, for example, disqualify
1776 // BB10 BBJ_COND => BB02
1780 // Here, BB10 is more nested than BB02.
1782 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1784 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1786 first->bbNum, bottom->bbNum);
1790 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1791 // Disqualify loops where the first block of the loop is a finally target.
1792 // The main problem is when multiple loops share a 'first' block that is a finally
1793 // target and we canonicalize the loops by adding a new loop head. In that case, we
1794 // need to update the blocks so the finally target bit is moved to the newly created
1795 // block, and removed from the old 'first' block. This is 'hard', so at this point
1796 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1797 // long-term), it's easier to disallow the loop than to update the flow graph to
1798 // support this case.
1800 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1802 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1805 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1807 /* At this point we have a loop - record it in the loop table
1808 * If we found only one exit, record it in the table too
1809 * (otherwise an exit = 0 in the loop table means multiple exits) */
1816 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1819 if (!hasMethodLoops)
1821 /* mark the method as containing natural loops */
1823 hasMethodLoops = true;
1826 /* increment total number of loops found */
1830 /* keep track of the number of exits */
1831 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1832 #endif // COUNT_LOOPS
1835 /* current predecessor not good for a loop - continue with another one, if any */
1841 loopCountTable.record(loopsThisMethod);
1842 if (maxLoopsPerMethod < loopsThisMethod)
1844 maxLoopsPerMethod = loopsThisMethod;
1846 if (loopOverflowThisMethod)
1848 totalLoopOverflows++;
1850 #endif // COUNT_LOOPS
1852 // Now the loop indices are stable. We can figure out parent/child relationships
1853 // (using table indices to name loops), and label blocks.
1854 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1856 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1859 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1861 optLoopTable[loopInd].lpParent = possibleParent;
1862 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1863 optLoopTable[possibleParent].lpChild = loopInd;
1869 // Now label the blocks with the innermost loop to which they belong. Since parents
1870 // precede children in the table, doing the labeling for each loop in order will achieve
1871 // this -- the innermost loop labeling will be done last.
1872 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1874 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1875 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1876 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1878 blk->bbNatLoopNum = loopInd;
1883 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1887 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1888 // one, if necessary, for loops containing others that share a "top."
1890 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1892 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1893 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1899 if (optCanonicalizeLoopNest(loopInd))
1906 fgUpdateChangedFlowGraph();
1910 if (verbose && optLoopCount > 0)
1912 printf("\nFinal natural loop table:\n");
1913 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1915 optPrintLoopInfo(loopInd);
1922 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1924 BasicBlock* newJumpDest = nullptr;
1925 switch (blk->bbJumpKind)
1930 case BBJ_EHFILTERRET:
1931 case BBJ_EHFINALLYRET:
1932 case BBJ_EHCATCHRET:
1933 // These have no jump destination to update.
1938 case BBJ_CALLFINALLY:
1940 // All of these have a single jump destination to update.
1941 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1943 blk->bbJumpDest = newJumpDest;
1949 bool redirected = false;
1950 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1952 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1954 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1958 // If any redirections happend, invalidate the switch table map for the switch.
1961 GetSwitchDescMap()->Remove(blk);
1971 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1972 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1974 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1976 // copy the jump destination(s) from "from" to "to".
1977 switch (to->bbJumpKind)
1981 case BBJ_CALLFINALLY:
1983 // All of these have a single jump destination to update.
1984 to->bbJumpDest = from->bbJumpDest;
1989 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1990 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1991 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
1993 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
1995 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2005 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2006 // Returns 'true' if the flow graph is modified.
2007 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2009 bool modified = false;
2011 // Is the top of the current loop not in any nested loop?
2012 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2014 if (optCanonicalizeLoop(loopInd))
2020 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2021 child = optLoopTable[child].lpSibling)
2023 if (optCanonicalizeLoopNest(child))
2032 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2034 // Is the top uniquely part of the current loop?
2035 BasicBlock* t = optLoopTable[loopInd].lpTop;
2037 if (t->bbNatLoopNum == loopInd)
2042 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2044 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2046 // Otherwise, the top of this loop is also part of a nested loop.
2048 // Insert a new unique top for this loop. We must be careful to put this new
2049 // block in the correct EH region. Note that f->bbPrev might be in a different
2050 // EH region. For example:
2058 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2059 // On the other hand, the first block of multiple loops might be the first
2060 // block of a 'try' region that is completely contained in the multiple loops.
2065 // BB10 BBJ_ALWAYS => BB08
2067 // BB12 BBJ_ALWAYS => BB08
2069 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2070 // is a single-block "try" region. Neither loop "bottom" block is in the same
2071 // "try" region as BB08. This is legal because you can jump to the first block
2072 // of a try region. With EH normalization, no two "try" regions will share
2073 // this block. In this case, we need to insert a new block for the outer loop
2074 // in the same EH region as the branch from the "bottom":
2079 // BB10 BBJ_ALWAYS => BB08
2081 // BB12 BBJ_ALWAYS => BB30
2083 // Another possibility is that the "first" block of the loop nest can be the first block
2084 // of a "try" region that also has other predecessors than those in the loop, or even in
2085 // the "try" region (since blocks can target the first block of a "try" region). For example:
2089 // BB10 BBJ_ALWAYS => BB08
2091 // BB12 BBJ_ALWAYS => BB08
2094 // BB20 BBJ_ALWAYS => BB08
2096 // BB25 BBJ_ALWAYS => BB08
2098 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2099 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2100 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2101 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2102 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2103 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2104 // situation of branching to a non-first block of a 'try' region.
2106 // We can also have a loop nest where the "first" block is outside of a "try" region
2107 // and the back edges are inside a "try" region, for example:
2111 // BB09 try { BBJ_COND => BB02
2113 // BB15 BBJ_COND => BB02
2115 // BB21 } // end of "try"
2117 // In this case, both loop back edges were formed by "leave" instructions that were
2118 // imported into branches that were later made conditional. In this case, we don't
2119 // want to copy the EH region of the back edge, since that would create a block
2120 // outside of and disjoint with the "try" region of the back edge. However, to
2121 // simplify things, we disqualify this type of loop, so we should never see this here.
2123 BasicBlock* h = optLoopTable[loopInd].lpHead;
2124 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2125 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2127 // The loop must be entirely contained within a single handler region.
2128 assert(BasicBlock::sameHndRegion(f, b));
2130 // If the bottom block is in the same "try" region, then we extend the EH
2131 // region. Otherwise, we add the new block outside the "try" region.
2132 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2133 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2136 // We need to set the EH region manually. Set it to be the same
2137 // as the bottom block.
2138 newT->copyEHRegion(b);
2141 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2143 // Redirect the "bottom" of the current loop to "newT".
2144 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2145 blockMap->Set(t, newT);
2146 optRedirectBlock(b, blockMap);
2148 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2149 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2150 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2151 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2154 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2155 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2156 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2157 // edge of the blockMap, so nothing will happen.
2158 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2160 BasicBlock* topPredBlock = topPred->flBlock;
2162 // Skip if topPredBlock is in the loop.
2163 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2164 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2165 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2166 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2168 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2169 "redirecting its bottom edge\n",
2170 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2174 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2176 optRedirectBlock(topPredBlock, blockMap);
2179 assert(newT->bbNext == f);
2182 newT->bbJumpKind = BBJ_ALWAYS;
2183 newT->bbJumpDest = t;
2184 newT->bbTreeList = nullptr;
2185 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2188 // If it had been a do-while loop (top == entry), update entry, as well.
2189 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2190 if (optLoopTable[loopInd].lpTop == origE)
2192 optLoopTable[loopInd].lpEntry = newT;
2194 optLoopTable[loopInd].lpTop = newT;
2195 optLoopTable[loopInd].lpFirst = newT;
2197 newT->bbNatLoopNum = loopInd;
2199 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2200 dspPtr(newT), loopInd);
2202 // Make sure the head block still goes to the entry...
2203 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2205 h->bbJumpKind = BBJ_ALWAYS;
2206 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2208 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2210 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2211 optLoopTable[loopInd].lpHead = h2;
2212 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2213 h2->bbTreeList = nullptr;
2214 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2217 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2218 // it must be the case that they were do-while's (since "h" fell through to the entry).
2219 // The new node "newT" becomes the head of such loops.
2220 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2221 childLoop = optLoopTable[childLoop].lpSibling)
2223 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2224 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2226 optUpdateLoopHead(childLoop, h, newT);
2232 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2234 assert(l1 != BasicBlock::NOT_IN_LOOP);
2239 else if (l2 == BasicBlock::NOT_IN_LOOP)
2245 return optLoopContains(l1, optLoopTable[l2].lpParent);
2249 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2251 assert(optLoopTable[loopInd].lpHead == from);
2252 optLoopTable[loopInd].lpHead = to;
2253 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2254 childLoop = optLoopTable[childLoop].lpSibling)
2256 if (optLoopTable[childLoop].lpHead == from)
2258 optUpdateLoopHead(childLoop, from, to);
2263 /*****************************************************************************
2264 * If the : i += const" will cause an overflow exception for the small types.
2267 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2274 type_MAX = SCHAR_MAX;
2277 type_MAX = UCHAR_MAX;
2280 type_MAX = SHRT_MAX;
2283 type_MAX = USHRT_MAX;
2286 case TYP_UINT: // Detected by checking for 32bit ....
2288 return false; // ... overflow same as done for TYP_INT
2294 if (iterAtExit > type_MAX)
2304 /*****************************************************************************
2305 * If the "i -= const" will cause an underflow exception for the small types
2308 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2315 type_MIN = SCHAR_MIN;
2318 type_MIN = SHRT_MIN;
2327 case TYP_UINT: // Detected by checking for 32bit ....
2329 return false; // ... underflow same as done for TYP_INT
2335 if (iterAtExit < type_MIN)
2345 /*****************************************************************************
2347 * Helper for unroll loops - Computes the number of repetitions
2348 * in a constant loop. If it cannot prove the number is constant returns false
2351 bool Compiler::optComputeLoopRep(int constInit,
2354 genTreeOps iterOper,
2355 var_types iterOperType,
2356 genTreeOps testOper,
2359 unsigned* iterCount)
2361 noway_assert(genActualType(iterOperType) == TYP_INT);
2364 __int64 constLimitX;
2369 // Using this, we can just do a signed comparison with other 32 bit values.
2372 constLimitX = (unsigned int)constLimit;
2376 constLimitX = (signed int)constLimit;
2379 switch (iterOperType)
2381 // For small types, the iteration operator will narrow these values if big
2383 #define INIT_ITER_BY_TYPE(type) \
2384 constInitX = (type)constInit; \
2385 iterInc = (type)iterInc;
2388 INIT_ITER_BY_TYPE(signed char);
2391 INIT_ITER_BY_TYPE(unsigned char);
2394 INIT_ITER_BY_TYPE(signed short);
2397 INIT_ITER_BY_TYPE(unsigned short);
2400 // For the big types, 32 bit arithmetic is performed
2406 constInitX = (unsigned int)constInit;
2410 constInitX = (signed int)constInit;
2415 noway_assert(!"Bad type");
2419 /* If iterInc is zero we have an infinite loop */
2425 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2426 iterSign = (iterInc > 0) ? +1 : -1;
2428 /* Initialize loopCount to zero */
2431 // If dupCond is true then the loop head contains a test which skips
2432 // this loop, if the constInit does not pass the loop test
2433 // Such a loop can execute zero times.
2434 // If dupCond is false then we have a true do-while loop which we
2435 // always execute the loop once before performing the loop test
2439 constInitX += iterInc;
2442 // bail if count is based on wrap-around math
2445 if (constLimitX < constInitX)
2450 else if (constLimitX > constInitX)
2455 /* Compute the number of repetitions */
2459 __int64 iterAtExitX;
2462 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2466 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2467 * have a constant number of iterations or loop forever -
2468 * we have to compute (lim-init) mod iterInc to see if it is zero.
2469 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2470 * which is probably not what the end user wanted, but it is legal.
2475 /* Stepping by one, i.e. Mod with 1 is always zero */
2478 if (((constLimitX - constInitX) % iterInc) != 0)
2486 noway_assert(iterInc < 0);
2487 /* Stepping by -1, i.e. Mod with 1 is always zero */
2490 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2506 if (constInitX != constLimitX)
2508 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2511 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2515 iterAtExitX = (unsigned)iterAtExitX;
2518 // Check if iteration incr will cause overflow for small types
2519 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2524 // iterator with 32bit overflow. Bad for TYP_(U)INT
2525 if (iterAtExitX < constLimitX)
2530 *iterCount = loopCount;
2546 noway_assert(!"Unknown operator for loop iterator");
2560 if (constInitX < constLimitX)
2562 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2565 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2569 iterAtExitX = (unsigned)iterAtExitX;
2572 // Check if iteration incr will cause overflow for small types
2573 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2578 // iterator with 32bit overflow. Bad for TYP_(U)INT
2579 if (iterAtExitX < constLimitX)
2584 *iterCount = loopCount;
2600 noway_assert(!"Unknown operator for loop iterator");
2614 if (constInitX <= constLimitX)
2616 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2619 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2623 iterAtExitX = (unsigned)iterAtExitX;
2626 // Check if iteration incr will cause overflow for small types
2627 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2632 // iterator with 32bit overflow. Bad for TYP_(U)INT
2633 if (iterAtExitX <= constLimitX)
2638 *iterCount = loopCount;
2654 noway_assert(!"Unknown operator for loop iterator");
2668 if (constInitX > constLimitX)
2670 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2673 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2677 iterAtExitX = (unsigned)iterAtExitX;
2680 // Check if small types will underflow
2681 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2686 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2687 if (iterAtExitX > constLimitX)
2692 *iterCount = loopCount;
2708 noway_assert(!"Unknown operator for loop iterator");
2722 if (constInitX >= constLimitX)
2724 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2727 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2731 iterAtExitX = (unsigned)iterAtExitX;
2734 // Check if small types will underflow
2735 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2740 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2741 if (iterAtExitX >= constLimitX)
2746 *iterCount = loopCount;
2762 noway_assert(!"Unknown operator for loop iterator");
2767 noway_assert(!"Unknown operator for loop condition");
2773 /*****************************************************************************
2775 * Look for loop unrolling candidates and unroll them
2779 #pragma warning(push)
2780 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2782 void Compiler::optUnrollLoops()
2784 if (compCodeOpt() == SMALL_CODE)
2789 if (optLoopCount == 0)
2795 if (JitConfig.JitNoUnroll())
2801 if (optCanCloneLoops())
2809 printf("*************** In optUnrollLoops()\n");
2812 /* Look for loop unrolling candidates */
2814 /* Double loop so that after unrolling an inner loop we set change to true
2815 * and we then go back over all of the loop candidates and try to unroll
2816 * the next outer loop, until we don't unroll any loops,
2817 * then change will be false and we are done.
2821 bool change = false;
2823 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2837 int lbeg; // initial value for iterator
2838 int llim; // limit value for iterator
2839 unsigned lvar; // iterator lclVar #
2840 int iterInc; // value to increment the iterator
2841 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2842 var_types iterOperType; // type result of the oper (for overflow instrs)
2843 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2844 bool unsTest; // Is the comparison u/int
2846 unsigned totalIter; // total number of iterations in the constant loop
2847 unsigned loopCostSz; // Cost is size of one iteration
2848 unsigned loopFlags; // actual lpFlags
2849 unsigned requiredFlags; // required lpFlags
2851 GenTree* loopList; // new stmt list of the unrolled loop
2854 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2861 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2862 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2864 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2867 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2873 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2880 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2881 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2883 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2886 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2888 unrollLimitSz *= 10;
2892 loopFlags = optLoopTable[lnum].lpFlags;
2893 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2895 /* Ignore the loop if we don't have a do-while with a single exit
2896 that has a constant number of iterations */
2898 if ((loopFlags & requiredFlags) != requiredFlags)
2903 /* ignore if removed or marked as not unrollable */
2905 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2910 head = optLoopTable[lnum].lpHead;
2912 bottom = optLoopTable[lnum].lpBottom;
2913 noway_assert(bottom);
2915 /* The single exit must be at the bottom of the loop */
2916 noway_assert(optLoopTable[lnum].lpExit);
2917 if (optLoopTable[lnum].lpExit != bottom)
2922 /* Unrolling loops with jumps in them is not worth the headache
2923 * Later we might consider unrolling loops after un-switching */
2928 block = block->bbNext;
2929 noway_assert(block);
2931 if (block->bbJumpKind != BBJ_NONE)
2933 if (block != bottom)
2938 } while (block != bottom);
2940 /* Get the loop data:
2944 - iterator increment
2945 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2946 - loop test type (i.e. GT_GE, GT_LT, etc...)
2949 lbeg = optLoopTable[lnum].lpConstInit;
2950 llim = optLoopTable[lnum].lpConstLimit();
2951 testOper = optLoopTable[lnum].lpTestOper();
2953 lvar = optLoopTable[lnum].lpIterVar();
2954 iterInc = optLoopTable[lnum].lpIterConst();
2955 iterOper = optLoopTable[lnum].lpIterOper();
2957 iterOperType = optLoopTable[lnum].lpIterOperType();
2958 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2960 if (lvaTable[lvar].lvAddrExposed)
2961 { // If the loop iteration variable is address-exposed then bail
2964 if (lvaTable[lvar].lvIsStructField)
2965 { // If the loop iteration variable is a promoted field from a struct then
2970 /* Locate the pre-header and initialization and increment/test statements */
2972 phdr = head->bbTreeList;
2974 loop = bottom->bbTreeList;
2977 init = head->lastStmt();
2978 noway_assert(init && (init->gtNext == nullptr));
2979 test = bottom->lastStmt();
2980 noway_assert(test && (test->gtNext == nullptr));
2981 incr = test->gtPrev;
2984 if (init->gtFlags & GTF_STMT_CMPADD)
2986 /* Must be a duplicated loop condition */
2987 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2990 init = init->gtPrev;
2998 /* Find the number of iterations - the function returns false if not a constant number */
3000 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3005 /* Forget it if there are too many repetitions or not a constant loop */
3007 if (totalIter > iterLimit)
3012 noway_assert(init->gtOper == GT_STMT);
3013 init = init->gtStmt.gtStmtExpr;
3014 noway_assert(test->gtOper == GT_STMT);
3015 test = test->gtStmt.gtStmtExpr;
3016 noway_assert(incr->gtOper == GT_STMT);
3017 incr = incr->gtStmt.gtStmtExpr;
3019 // Don't unroll loops we don't understand.
3020 if (incr->gtOper == GT_ASG)
3025 /* Make sure everything looks ok */
3026 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3027 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3028 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3030 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
3031 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
3032 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3034 (test->gtOper != GT_JTRUE))
3036 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3040 /* heuristic - Estimated cost in code size of the unrolled loop */
3048 block = block->bbNext;
3050 /* Visit all the statements in the block */
3052 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3054 /* Get the expression and stop if end reached */
3056 GenTreePtr expr = stmt->gtStmtExpr;
3062 /* Calculate gtCostSz */
3063 gtSetStmtInfo(stmt);
3065 /* Update loopCostSz */
3066 loopCostSz += stmt->gtCostSz;
3068 } while (block != bottom);
3070 /* Compute the estimated increase in code size for the unrolled loop */
3072 unsigned int fixedLoopCostSz;
3073 fixedLoopCostSz = 8;
3076 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3078 /* Don't unroll if too much code duplication would result. */
3080 if (unrollCostSz > unrollLimitSz)
3082 /* prevent this loop from being revisited */
3083 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3087 /* Looks like a good idea to unroll this loop, let's do it! */
3088 CLANG_FORMAT_COMMENT_ANCHOR;
3093 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3094 if (head->bbNext->bbNum != bottom->bbNum)
3096 printf("..BB%02u", bottom->bbNum);
3098 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3099 printf(" unrollCostSz = %d\n", unrollCostSz);
3104 /* Create the unrolled loop statement list */
3106 loopList = loopLast = nullptr;
3108 for (lval = lbeg; totalIter; totalIter--)
3117 block = block->bbNext;
3118 noway_assert(block);
3120 /* Visit all the statements in the block */
3122 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3124 /* Stop if we've reached the end of the loop */
3126 if (stmt->gtStmtExpr == incr)
3131 /* Clone/substitute the expression */
3133 expr = gtCloneExpr(stmt, 0, lvar, lval);
3135 // cloneExpr doesn't handle everything
3139 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3143 /* Append the expression to our list */
3147 loopLast->gtNext = expr;
3154 expr->gtPrev = loopLast;
3157 } while (block != bottom);
3159 /* update the new value for the unrolled iterator */
3173 noway_assert(!"Unrolling not implemented for this loop iterator");
3177 noway_assert(!"Unknown operator for constant loop iterator");
3182 /* Finish the linked list */
3186 loopList->gtPrev = loopLast;
3187 loopLast->gtNext = nullptr;
3190 /* Replace the body with the unrolled one */
3196 block = block->bbNext;
3197 noway_assert(block);
3198 block->bbTreeList = nullptr;
3199 block->bbJumpKind = BBJ_NONE;
3200 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3201 } while (block != bottom);
3203 bottom->bbJumpKind = BBJ_NONE;
3204 bottom->bbTreeList = loopList;
3205 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3206 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3210 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3212 /* Update bbRefs and bbPreds */
3213 /* Here head->bbNext is bottom !!! - Replace it */
3215 fgRemoveRefPred(head->bbNext, bottom);
3217 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3218 * (the last value of the iterator in the loop)
3219 * and drop the jump condition since the unrolled loop will always execute */
3221 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3223 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3225 if (head->bbJumpKind == BBJ_COND)
3227 phdr = head->bbTreeList;
3229 test = phdr->gtPrev;
3231 noway_assert(test && (test->gtNext == nullptr));
3232 noway_assert(test->gtOper == GT_STMT);
3233 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3235 init = test->gtPrev;
3236 noway_assert(init && (init->gtNext == test));
3237 noway_assert(init->gtOper == GT_STMT);
3239 init->gtNext = nullptr;
3240 phdr->gtPrev = init;
3241 head->bbJumpKind = BBJ_NONE;
3242 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3244 /* Update bbRefs and bbPreds */
3246 fgRemoveRefPred(head->bbJumpDest, head);
3250 /* the loop must execute */
3251 noway_assert(head->bbJumpKind == BBJ_NONE);
3257 printf("Whole unrolled loop:\n");
3259 GenTreePtr s = loopList;
3263 noway_assert(s->gtOper == GT_STMT);
3274 /* Remember that something has changed */
3278 /* Make sure to update loop table */
3280 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3281 * (also make head and bottom NULL - to hit an assert or GPF) */
3283 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3284 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3296 fgDebugCheckBBlist();
3300 #pragma warning(pop)
3303 /*****************************************************************************
3305 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3306 * not execute a method call.
3309 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3311 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3312 // as some helper calls are neither interruptible nor hijackable.
3313 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3314 // those helpers too.
3316 noway_assert(topBB->bbNum <= botBB->bbNum);
3318 // We can always check topBB and botBB for any gc safe points and early out
3320 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3325 // Otherwise we will need to rely upon the dominator sets
3327 if (!fgDomsComputed)
3329 // return a conservative answer of true when we don't have the dominator sets
3333 BasicBlock* curBB = topBB;
3336 noway_assert(curBB);
3338 // If we added a loop pre-header block then we will
3339 // have a bbNum greater than fgLastBB, and we won't have
3340 // any dominator information about this block, so skip it.
3342 if (curBB->bbNum <= fgLastBB->bbNum)
3344 noway_assert(curBB->bbNum <= botBB->bbNum);
3346 // Does this block contain a gc safe point?
3348 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3350 // Will this block always execute on the way to botBB ?
3352 // Since we are checking every block in [topBB .. botBB] and we are using
3353 // a lexical definition of a loop.
3354 // (all that we know is that is that botBB is a back-edge to topBB)
3355 // Thus while walking blocks in this range we may encounter some blocks
3356 // that are not really part of the loop, and so we need to perform
3357 // some additional checks:
3359 // We will check that the current 'curBB' is reachable from 'topBB'
3360 // and that it dominates the block containing the back-edge 'botBB'
3361 // When both of these are true then we know that the gcsafe point in 'curBB'
3362 // will be encountered in the loop and we can return false
3364 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3371 // If we've reached the destination block, then we're done
3380 curBB = curBB->bbNext;
3383 // If we didn't find any blocks that contained a gc safe point and
3384 // also met the fgDominate and fgReachable criteria then we must return true
3389 /*****************************************************************************
3391 * Find the loop termination test at the bottom of the loop
3394 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3396 GenTreePtr testt = bottom->bbTreeList;
3398 assert(testt && testt->gtOper == GT_STMT);
3400 GenTreePtr result = testt->gtPrev;
3403 while (testt->gtNext)
3405 testt = testt->gtNext;
3408 assert(testt == result);
3414 /*****************************************************************************
3415 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3418 void Compiler::fgOptWhileLoop(BasicBlock* block)
3420 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3421 noway_assert(compCodeOpt() != SMALL_CODE);
3424 Optimize while loops into do { } while loop
3425 Our loop hoisting logic requires do { } while loops.
3426 Specifically, we're looking for the following case:
3437 If we find this, and the condition is simple enough, we change
3438 the loop to the following:
3443 // else fall-through
3454 /* Does the BB end with an unconditional jump? */
3456 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3457 { // It can't be one of the ones we use for our exception magic
3461 // It has to be a forward jump
3462 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3464 if (fgIsForwardBranch(block) == false)
3469 // Get hold of the jump target
3470 BasicBlock* bTest = block->bbJumpDest;
3472 // Does the block consist of 'jtrue(cond) block' ?
3473 if (bTest->bbJumpKind != BBJ_COND)
3478 // bTest must be a backwards jump to block->bbNext
3479 if (bTest->bbJumpDest != block->bbNext)
3484 // Since test is a BBJ_COND it will have a bbNext
3485 noway_assert(bTest->bbNext);
3487 // 'block' must be in the same try region as the condition, since we're going to insert
3488 // a duplicated condition in 'block', and the condition might include exception throwing code.
3489 if (!BasicBlock::sameTryRegion(block, bTest))
3494 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3495 // same try region (or no try region) to avoid generating illegal flow.
3496 BasicBlock* bTestNext = bTest->bbNext;
3497 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3502 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3504 // bTest must only contain only a jtrue with no other stmts, we will only clone
3505 // the conditional, so any other statements will not get cloned
3506 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3508 if (bTest->bbTreeList != condStmt)
3513 /* Get to the condition node from the statement tree */
3515 noway_assert(condStmt->gtOper == GT_STMT);
3517 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3518 noway_assert(condTree->gtOper == GT_JTRUE);
3520 condTree = condTree->gtOp.gtOp1;
3522 // The condTree has to be a RelOp comparison
3523 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3525 if (condTree->OperIsCompare() == false)
3530 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3532 gtPrepareCost(condTree);
3533 unsigned estDupCostSz = condTree->gtCostSz;
3535 double loopIterations = (double)BB_LOOP_WEIGHT;
3537 bool allProfileWeightsAreValid = false;
3538 BasicBlock::weight_t weightBlock = block->bbWeight;
3539 BasicBlock::weight_t weightTest = bTest->bbWeight;
3540 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3542 // If we have profile data then we calculate the number of time
3543 // the loop will iterate into loopIterations
3544 if (fgIsUsingProfileWeights())
3546 // Only rely upon the profile weight when all three of these blocks
3547 // have good profile weights
3548 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3549 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3551 allProfileWeightsAreValid = true;
3553 // If this while loop never iterates then don't bother transforming
3554 if (weightNext == 0)
3559 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3560 // if the profile weights are all valid.
3562 // weightNext is the number of time this loop iterates
3563 // weightBlock is the number of times that we enter the while loop
3564 // loopIterations is the average number of times that this loop iterates
3566 if (weightTest >= weightBlock)
3568 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3573 unsigned maxDupCostSz = 32;
3575 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3576 // set loop weights yet
3577 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3582 // If this loop iterates a lot then raise the maxDupCost
3583 if (loopIterations >= 12.0)
3587 if (loopIterations >= 96.0)
3592 // If the loop condition has a shared static helper, we really want this loop converted
3593 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3594 // be executed on every loop iteration.
3595 int countOfHelpers = 0;
3596 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3598 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3600 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3603 // If the compare has too high cost then we don't want to dup
3605 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3610 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3611 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3612 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3613 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3614 allProfileWeightsAreValid ? "true" : "false");
3623 /* Looks good - duplicate the condition test */
3625 condTree->gtFlags |= GTF_RELOP_ZTT;
3627 condTree = gtCloneExpr(condTree);
3628 gtReverseCond(condTree);
3630 // Make sure clone expr copied the flag
3631 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3633 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3635 /* Create a statement entry out of the condition and
3636 append the condition test at the end of 'block' */
3638 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3640 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3642 #ifdef DEBUGGING_SUPPORT
3643 if (opts.compDbgInfo)
3645 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3649 // Flag the block that received the copy as potentially having an array/vtable
3650 // reference if the block copied from did; this is a conservative guess.
3651 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3653 block->bbFlags |= copyFlags;
3656 // If we have profile data for all blocks and we know that we are cloning the
3657 // bTest block into block and thus changing the control flow from block so
3658 // that it no longer goes directly to bTest anymore, we have to adjust the
3659 // weight of bTest by subtracting out the weight of block.
3661 if (allProfileWeightsAreValid)
3664 // Some additional sanity checks before adjusting the weight of bTest
3666 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3668 // Get the two edge that flow out of bTest
3669 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3670 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3672 // Calculate the new weight for block bTest
3674 BasicBlock::weight_t newWeightTest =
3675 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3676 bTest->bbWeight = newWeightTest;
3678 if (newWeightTest == BB_ZERO_WEIGHT)
3680 bTest->bbFlags |= BBF_RUN_RARELY;
3681 // All out edge weights are set to zero
3682 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3683 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3684 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3685 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3689 // Update the our edge weights
3690 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3691 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3692 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3693 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3698 /* Change the block to end with a conditional jump */
3700 block->bbJumpKind = BBJ_COND;
3701 block->bbJumpDest = bTest->bbNext;
3703 /* Mark the jump dest block as being a jump target */
3704 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3706 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3708 fgAddRefPred(block->bbNext, block);
3710 fgRemoveRefPred(bTest, block);
3711 fgAddRefPred(bTest->bbNext, block);
3716 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3718 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3720 gtDispTree(copyOfCondStmt);
3726 /*****************************************************************************
3728 * Optimize the BasicBlock layout of the method
3731 void Compiler::optOptimizeLayout()
3733 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3738 printf("*************** In optOptimizeLayout()\n");
3742 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3743 fgDebugCheckBBlist();
3746 noway_assert(fgModified == false);
3748 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3750 /* Make sure the appropriate fields are initialized */
3752 if (block->bbWeight == BB_ZERO_WEIGHT)
3754 /* Zero weighted block can't have a LOOP_HEAD flag */
3755 noway_assert(block->isLoopHead() == false);
3759 assert(block->bbLoopNum == 0);
3761 if (compCodeOpt() != SMALL_CODE)
3763 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3765 fgOptWhileLoop(block);
3771 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3772 fgComputeEdgeWeights();
3775 fgUpdateFlowGraph(true);
3777 fgUpdateFlowGraph();
3780 /*****************************************************************************
3782 * Perform loop inversion, find and classify natural loops
3785 void Compiler::optOptimizeLoops()
3787 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3792 printf("*************** In optOptimizeLoops()\n");
3796 optSetBlockWeights();
3798 /* Were there any loops in the flow graph? */
3802 /* now that we have dominator information we can find loops */
3804 optFindNaturalLoops();
3806 unsigned loopNum = 0;
3808 /* Iterate over the flow graph, marking all loops */
3810 /* We will use the following terminology:
3811 * top - the first basic block in the loop (i.e. the head of the backward edge)
3812 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3813 * lastBottom - used when we have multiple back-edges to the same top
3820 for (top = fgFirstBB; top; top = top->bbNext)
3822 BasicBlock* foundBottom = nullptr;
3824 for (pred = top->bbPreds; pred; pred = pred->flNext)
3826 /* Is this a loop candidate? - We look for "back edges" */
3828 BasicBlock* bottom = pred->flBlock;
3830 /* is this a backward edge? (from BOTTOM to TOP) */
3832 if (top->bbNum > bottom->bbNum)
3837 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3839 if (top->isLoopHead() == false)
3844 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3846 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3851 /* the top block must be able to reach the bottom block */
3852 if (!fgReachable(top, bottom))
3857 /* Found a new loop, record the longest backedge in foundBottom */
3859 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3861 foundBottom = bottom;
3869 /* Mark the loop header as such */
3870 assert(FitsIn<unsigned char>(loopNum));
3871 top->bbLoopNum = (unsigned char)loopNum;
3874 /* Mark all blocks between 'top' and 'bottom' */
3876 optMarkLoopBlocks(top, foundBottom, false);
3879 // We track at most 255 loops
3883 totalUnnatLoopOverflows++;
3890 totalUnnatLoopCount += loopNum;
3898 printf("\nFound a total of %d loops.", loopNum);
3899 printf("\nAfter loop weight marking:\n");
3900 fgDispBasicBlocks();
3905 optLoopsMarked = true;
3909 //------------------------------------------------------------------------
3910 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3913 // loopNum - the current loop index for which conditions are derived.
3914 // context - data structure where all loop cloning info is kept.
3917 // "false" if conditions cannot be obtained. "true" otherwise.
3918 // The cloning conditions are updated in the "conditions"[loopNum] field
3919 // of the "context" parameter.
3922 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3923 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3924 // condition is "less than". If the initializer is "var" init then adds condition
3925 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3926 // are added to "context". These conditions are checked in the pre-header block
3927 // and the cloning choice is made.
3930 // Callers should assume AND operation is used i.e., if all conditions are
3931 // true, then take the fast path.
3933 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3935 JITDUMP("------------------------------------------------------------\n");
3936 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3938 LoopDsc* loop = &optLoopTable[loopNum];
3939 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3941 if (loop->lpTestOper() == GT_LT)
3943 // Stride conditions
3944 if (loop->lpIterConst() <= 0)
3946 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3951 if (loop->lpFlags & LPFLG_CONST_INIT)
3953 // Only allowing const init at this time.
3954 if (loop->lpConstInit < 0)
3956 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3960 else if (loop->lpFlags & LPFLG_VAR_INIT)
3963 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3964 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3965 context->EnsureConditions(loopNum)->Push(geZero);
3969 JITDUMP("> Not variable init\n");
3975 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3977 int limit = loop->lpConstLimit();
3980 JITDUMP("> limit %d is invalid\n", limit);
3983 ident = LC_Ident(limit, LC_Ident::Const);
3985 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3987 unsigned limitLcl = loop->lpVarLimit();
3988 ident = LC_Ident(limitLcl, LC_Ident::Var);
3990 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
3992 context->EnsureConditions(loopNum)->Push(geZero);
3994 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
3996 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
3997 if (!loop->lpArrLenLimit(this, index))
3999 JITDUMP("> ArrLen not matching");
4002 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4004 // Ensure that this array must be dereference-able, before executing the actual condition.
4005 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4006 context->EnsureDerefs(loopNum)->Push(array);
4010 JITDUMP("> Undetected limit\n");
4014 for (unsigned i = 0; i < optInfos->Size(); ++i)
4016 LcOptInfo* optInfo = optInfos->GetRef(i);
4017 switch (optInfo->GetOptType())
4019 case LcOptInfo::LcJaggedArray:
4022 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4023 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4024 LC_Ident arrLenIdent = LC_Ident(arrLen);
4026 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4027 context->EnsureConditions(loopNum)->Push(cond);
4029 // Ensure that this array must be dereference-able, before executing the actual condition.
4030 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4031 context->EnsureDerefs(loopNum)->Push(array);
4034 case LcOptInfo::LcMdArray:
4036 // limit <= mdArrLen
4037 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4038 LC_Condition cond(GT_LE, LC_Expr(ident),
4039 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4040 mdArrInfo->GetArrIndexForDim(getAllocator()),
4041 mdArrInfo->dim, LC_Array::None))));
4042 context->EnsureConditions(loopNum)->Push(cond);
4047 JITDUMP("Unknown opt\n");
4051 JITDUMP("Conditions: (");
4052 DBEXEC(verbose, context->PrintConditions(loopNum));
4059 //------------------------------------------------------------------------------------
4060 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4063 // loopNum - the current loop index for which conditions are derived.
4064 // context - data structure where all loop cloning info is kept.
4067 // "false" if conditions cannot be obtained. "true" otherwise.
4068 // The deref conditions are updated in the "derefConditions"[loopNum] field
4069 // of the "context" parameter.
4071 // Definition of Deref Conditions:
4072 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4073 // we should first be able to dereference "a". i.e., "a" is non-null.
4079 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4080 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4083 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4084 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4085 // be true to do the deref.
4087 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4089 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4090 // blocks each of which will branch to slow path if the condition evaluates to false.
4092 // Now, imagine a situation where we have
4093 // a[x][y][k] = 20 and a[i][j][k] = 0
4094 // also in the inner most loop where x, y are parameters, then our conditions will have
4098 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4100 // But these conditions can be checked together with conditions
4101 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4104 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4105 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4106 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4107 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4109 // This naturally yields a tree style pattern, where the nodes of the tree are
4110 // the array and indices respectively.
4126 // Notice that the variables in the same levels can have their conditions combined in the
4127 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4128 // combined with a short-circuiting AND (i.e., different basic blocks).
4131 // Construct a tree of array indices and the array which will generate the optimal
4132 // conditions for loop cloning.
4134 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4149 // In this method, we will construct such a tree by descending depth first into the array
4150 // index operation and forming a tree structure as we encounter the array or the index variables.
4152 // This tree structure will then be used to generate conditions like below:
4153 // (a != null) & (b != null) && // from the first level of the tree.
4155 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4156 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4158 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4159 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4164 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4166 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4169 // Get the dereference-able arrays.
4170 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4172 // For each array in the dereference list, construct a tree,
4173 // where the nodes are array and index variables and an edge 'u-v'
4174 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4175 // 'u-v-w' transitively if u[v][w] occurs.
4176 for (unsigned i = 0; i < deref->Size(); ++i)
4178 LC_Array& array = (*deref)[i];
4180 // First populate the array base variable.
4181 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4182 if (node == nullptr)
4184 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4188 // For each dimension (level) for the array, populate the tree with the variable
4189 // from that dimension.
4190 unsigned rank = (unsigned)array.GetDimRank();
4191 for (unsigned i = 0; i < rank; ++i)
4193 node->EnsureChildren(getAllocator());
4194 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4197 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4198 node->children->Push(tmp);
4201 // Descend one level down.
4205 // Keep the maxRank of all array dereferences.
4206 maxRank = max((int)rank, maxRank);
4212 for (unsigned i = 0; i < nodes.Size(); ++i)
4229 // First level will always yield the null-check, since it is made of the array base variables.
4230 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4231 // So add 1 after rank * 2.
4232 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4234 // Heuristic to not create too many blocks;
4240 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4241 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4242 for (unsigned i = 0; i < nodes.Size(); ++i)
4244 nodes[i]->DeriveLevelConditions(levelCond);
4247 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4252 //----------------------------------------------------------------------------
4253 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4256 // block - the block in which the helper call needs to be inserted.
4257 // insertBefore - the tree before which the helper call will be inserted.
4259 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4261 if (JitConfig.JitDebugLogLoopCloning() == 0)
4265 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4266 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4267 fgInsertStmtBefore(block, insertBefore, stmt);
4268 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4272 //------------------------------------------------------------------------
4273 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4274 // candidates gathered during the cloning phase.
4277 // loopNum - the current loop index for which the optimizations are performed.
4278 // context - data structure where all loop cloning info is kept.
4279 // dynamicPath - If true, the optimization is performed in the fast path among the
4280 // cloned loops. If false, it means this is the only path (i.e.,
4281 // there is no slow path.)
4284 // Perform the optimizations on the fast path i.e., the path in which the
4285 // optimization candidates were collected at the time of identifying them.
4286 // The candidates store all the information necessary (the tree/stmt/block
4287 // they are from) to perform the optimization.
4290 // The unoptimized path is either already cloned when this method is called or
4291 // there is no unoptimized path (got eliminated statically.) So this method
4292 // performs the optimizations assuming that the path in which the candidates
4293 // were collected is the fast path in which the optimizations will be performed.
4295 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4297 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4298 for (unsigned i = 0; i < optInfos->Size(); ++i)
4300 LcOptInfo* optInfo = optInfos->GetRef(i);
4301 switch (optInfo->GetOptType())
4303 case LcOptInfo::LcJaggedArray:
4305 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4306 compCurBB = arrIndexInfo->arrIndex.useBlock;
4307 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4309 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4312 case LcOptInfo::LcMdArray:
4313 // TODO-CQ: CLONE: Implement.
4321 //----------------------------------------------------------------------------
4322 // optCanCloneLoops: Use the environment flag to determine whether loop
4323 // cloning is allowed to be performed.
4326 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4327 // Disabled for retail for now.
4329 bool Compiler::optCanCloneLoops()
4331 // Enabled for retail builds now.
4332 unsigned cloneLoopsFlag = 1;
4334 cloneLoopsFlag = JitConfig.JitCloneLoops();
4336 return (cloneLoopsFlag != 0);
4339 //----------------------------------------------------------------------------
4340 // optIsLoopClonable: Determine whether this loop can be cloned.
4343 // loopInd loop index which needs to be checked if it can be cloned.
4346 // Returns true if the loop can be cloned. If it returns false
4347 // prints a message in debug as why the loop can't be cloned.
4349 bool Compiler::optIsLoopClonable(unsigned loopInd)
4351 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4352 // inserting new EH regions in the exception table yet.
4353 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4354 unsigned loopRetCount = 0;
4355 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4357 if (blk->bbJumpKind == BBJ_RETURN)
4361 if (bbIsTryBeg(blk))
4363 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4368 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4369 // into the middle of a handler (to go to the cloned copy.) Reject.
4370 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4372 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4376 // If the head and entry are in different EH regions, reject.
4377 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4379 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4383 // Is the first block after the last block of the loop a handler or filter start?
4384 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4385 // and go to where the original loop did. That raises problems when we don't actually go to
4386 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4387 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4388 // loop. This is just a corner to cut to get this working faster.
4389 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4390 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4392 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4396 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4397 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4398 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4399 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4400 if (fgReturnCount + loopRetCount > 4)
4402 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4403 "would exceed the limit of 4.\n",
4404 loopRetCount, fgReturnCount);
4408 // Otherwise, we're going to add those return blocks.
4409 fgReturnCount += loopRetCount;
4414 /*****************************************************************************
4416 * Identify loop cloning opportunities, derive loop cloning conditions,
4417 * perform loop cloning, use the derived conditions to choose which
4420 void Compiler::optCloneLoops()
4422 JITDUMP("\n*************** In optCloneLoops()\n");
4423 if (optLoopCount == 0 || !optCanCloneLoops())
4431 printf("Blocks/Trees at start of phase\n");
4432 fgDispBasicBlocks(true);
4436 LoopCloneContext context(optLoopCount, getAllocator());
4438 // Obtain array optimization candidates in the context.
4439 optObtainLoopCloningOpts(&context);
4441 // For each loop, derive cloning conditions for the optimization candidates.
4442 for (unsigned i = 0; i < optLoopCount; ++i)
4444 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4445 if (optInfos == nullptr)
4450 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4452 JITDUMP("> Conditions could not be obtained\n");
4453 context.CancelLoopOptInfo(i);
4457 bool allTrue = false;
4458 bool anyFalse = false;
4459 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4462 context.CancelLoopOptInfo(i);
4466 // Perform static optimizations on the fast path since we always
4467 // have to take the cloned path.
4468 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4470 // No need to clone.
4471 context.CancelLoopOptInfo(i);
4477 // The code in this #if has been useful in debugging loop cloning issues, by
4478 // enabling selective enablement of the loop cloning optimization according to
4481 unsigned methHash = info.compMethodHash();
4482 char* lostr = getenv("loopclonehashlo");
4483 unsigned methHashLo = 0;
4486 sscanf_s(lostr, "%x", &methHashLo);
4487 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4489 char* histr = getenv("loopclonehashhi");
4490 unsigned methHashHi = UINT32_MAX;
4493 sscanf_s(histr, "%x", &methHashHi);
4494 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4496 if (methHash < methHashLo || methHash > methHashHi)
4501 for (unsigned i = 0; i < optLoopCount; ++i)
4503 if (context.GetLoopOptInfo(i) != nullptr)
4506 context.OptimizeConditions(i DEBUGARG(verbose));
4507 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4508 optCloneLoop(i, &context);
4515 printf("\nAfter loop cloning:\n");
4516 fgDispBasicBlocks(/*dumpTrees*/ true);
4521 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4523 assert(loopInd < optLoopCount);
4525 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4526 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4527 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4529 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4530 unsigned depth = optLoopDepth(loopInd);
4531 unsigned ambientWeight = 1;
4532 for (unsigned j = 0; j < depth; j++)
4534 unsigned lastWeight = ambientWeight;
4535 ambientWeight *= BB_LOOP_WEIGHT;
4536 // If the multiplication overflowed, stick at max.
4537 // (Strictly speaking, a multiplication could overflow and still have a result
4538 // that is >= lastWeight...but if so, the original weight must be pretty large,
4539 // and it got bigger, so that's OK.)
4540 if (ambientWeight < lastWeight)
4542 ambientWeight = BB_MAX_WEIGHT;
4547 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4548 // Be safe by taking the max with the head block's weight.
4549 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4551 // This is the containing loop, if any -- to label any blocks we create that are outside
4552 // the loop being cloned.
4553 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4555 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4556 optEnsureUniqueHead(loopInd, ambientWeight);
4558 // We're going to make
4570 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4582 BasicBlock* h = optLoopTable[loopInd].lpHead;
4583 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4585 // Make a new block to be the unique entry to the loop.
4586 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4587 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4588 /*extendRegion*/ true);
4589 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4590 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4591 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4592 newH->bbNatLoopNum = ambientLoop;
4594 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4597 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4598 // "newPred" will be the predecessor of the blocks of the cloned loop.
4599 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4600 BasicBlock* newPred = b;
4601 if (b->bbJumpKind != BBJ_ALWAYS)
4603 BasicBlock* x = b->bbNext;
4606 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4607 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4609 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4610 x2->bbNatLoopNum = ambientLoop;
4613 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4618 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4619 // so that "h" already falls through to "e" (e == t == f).
4620 BasicBlock* h2 = nullptr;
4621 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4623 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4624 /*extendRegion*/ true);
4625 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4627 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4628 h2->bbNatLoopNum = ambientLoop;
4630 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4631 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4634 // Now we'll clone the blocks of the loop body.
4635 BasicBlock* newFirst = nullptr;
4636 BasicBlock* newBot = nullptr;
4638 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4639 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4642 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4643 /*extendRegion*/ true);
4645 BasicBlock::CloneBlockState(this, newBlk, blk);
4646 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4647 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4648 // loop, if one exists -- the parent of the loop we're cloning.
4649 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4651 if (newFirst == nullptr)
4655 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4657 blockMap->Set(blk, newBlk);
4660 // Perform the static optimizations on the fast path.
4661 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4663 // Now go through the new blocks, remapping their jump targets within the loop.
4664 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4668 BasicBlock* newblk = nullptr;
4669 bool b = blockMap->Lookup(blk, &newblk);
4670 assert(b && newblk != nullptr);
4672 assert(blk->bbJumpKind == newblk->bbJumpKind);
4674 // First copy the jump destination(s) from "blk".
4675 optCopyBlkDest(blk, newblk);
4677 // Now redirect the new block according to "blockMap".
4678 optRedirectBlock(newblk, blockMap);
4681 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4682 (h->bbJumpKind == BBJ_ALWAYS));
4684 // If all the conditions are true, go to E2.
4685 BasicBlock* e2 = nullptr;
4686 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4688 h->bbJumpKind = BBJ_COND;
4690 // We will create the following structure
4692 // cond0 (in h) -?> cond1
4693 // slow --> e2 (slow) always
4700 // We should always have block conditions, at the minimum, the array should be deref-able
4701 assert(context->HasBlockConditions(loopInd));
4703 // Create a unique header for the slow path.
4704 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4705 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4706 slowHead->bbNatLoopNum = ambientLoop;
4707 slowHead->bbJumpDest = e2;
4709 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4710 condLast->bbJumpDest = slowHead;
4712 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4715 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4717 assert(foundIt && e2 != nullptr);
4719 fgUpdateChangedFlowGraph();
4722 //--------------------------------------------------------------------------------------------------
4723 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4726 // context loop cloning context variable
4727 // loopNum the loop index
4728 // head loop head for "loopNum"
4729 // slowHead the slow path loop head
4735 // Create the following structure.
4737 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4738 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4740 // cond0 (in h) -?> cond1
4741 // slowHead --> e2 (slowHead) always
4742 // !cond1 -?> slowHead
4743 // !cond2 -?> slowHead
4745 // !condn -?> slowHead
4748 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4750 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4753 BasicBlock* slowHead)
4755 JITDUMP("Inserting loop cloning conditions\n");
4756 assert(context->HasBlockConditions(loopNum));
4758 BasicBlock* curCond = head;
4759 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4760 for (unsigned i = 0; i < levelCond->Size(); ++i)
4762 bool isHeaderBlock = (curCond == head);
4764 // Flip the condition if header block.
4765 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4767 // Create each condition block ensuring wiring between them.
4768 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4769 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4772 curCond->inheritWeight(head);
4773 curCond->bbNatLoopNum = head->bbNatLoopNum;
4774 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4777 // Finally insert cloning conditions after all deref conditions have been inserted.
4778 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4782 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4784 BasicBlock* h = optLoopTable[loopInd].lpHead;
4785 BasicBlock* t = optLoopTable[loopInd].lpTop;
4786 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4787 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4789 // If "h" dominates the entry block, then it is the unique header.
4790 if (fgDominate(h, e))
4795 // Otherwise, create a new empty header block, make it the pred of the entry block,
4796 // and redirect the preds of the entry block to go to this.
4798 BasicBlock* beforeTop = t->bbPrev;
4799 // Make sure that the new block is in the same region as the loop.
4800 // (We will only create loops that are entirely within a region.)
4801 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4802 // This is in the containing loop.
4803 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4804 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4806 // We don't care where it was put; splice it between beforeTop and top.
4807 if (beforeTop->bbNext != h2)
4809 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4810 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4814 if (h2->bbNext != e)
4816 h2->bbJumpKind = BBJ_ALWAYS;
4819 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4821 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4822 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4823 blockMap->Set(e, h2);
4825 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4827 BasicBlock* predBlock = predEntry->flBlock;
4829 // Skip if predBlock is in the loop.
4830 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4834 optRedirectBlock(predBlock, blockMap);
4837 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4840 /*****************************************************************************
4842 * Determine the kind of interference for the call.
4845 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4847 assert(call->gtOper == GT_CALL);
4849 // if not a helper, kills everything
4850 if (call->gtCall.gtCallType != CT_HELPER)
4855 // setfield and array address store kill all indirections
4856 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4858 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4859 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4860 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4861 case CORINFO_HELP_SETFIELDOBJ:
4862 case CORINFO_HELP_ARRADDR_ST:
4864 return CALLINT_REF_INDIRS;
4866 case CORINFO_HELP_SETFIELDFLOAT:
4867 case CORINFO_HELP_SETFIELDDOUBLE:
4868 case CORINFO_HELP_SETFIELD8:
4869 case CORINFO_HELP_SETFIELD16:
4870 case CORINFO_HELP_SETFIELD32:
4871 case CORINFO_HELP_SETFIELD64:
4873 return CALLINT_SCL_INDIRS;
4875 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4876 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4877 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4878 case CORINFO_HELP_SETFIELDSTRUCT:
4880 return CALLINT_ALL_INDIRS;
4886 // other helpers kill nothing
4887 return CALLINT_NONE;
4890 /*****************************************************************************
4892 * See if the given tree can be computed in the given precision (which must
4893 * be smaller than the type of the tree for this to make sense). If 'doit'
4894 * is false, we merely check to see whether narrowing is possible; if we
4895 * get called with 'doit' being true, we actually perform the narrowing.
4898 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4904 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4906 /* Assume we're only handling integer types */
4907 noway_assert(varTypeIsIntegral(srct));
4908 noway_assert(varTypeIsIntegral(dstt));
4910 unsigned srcSize = genTypeSize(srct);
4911 unsigned dstSize = genTypeSize(dstt);
4913 /* dstt must be smaller than srct to narrow */
4914 if (dstSize >= srcSize)
4919 /* Figure out what kind of a node we have */
4920 oper = tree->OperGet();
4921 kind = tree->OperKind();
4923 if (kind & GTK_ASGOP)
4925 noway_assert(doit == false);
4929 ValueNumPair NoVNPair = ValueNumPair();
4931 if (kind & GTK_LEAF)
4935 /* Constants can usually be narrowed by changing their value */
4936 CLANG_FORMAT_COMMENT_ANCHOR;
4938 #ifndef _TARGET_64BIT_
4943 lval = tree->gtIntConCommon.LngValue();
4972 if ((lval & lmask) != lval)
4977 tree->ChangeOperConst(GT_CNS_INT);
4978 tree->gtType = TYP_INT;
4979 tree->gtIntCon.gtIconVal = (int)lval;
4980 if (vnStore != nullptr)
4982 fgValueNumberTreeConst(tree);
4992 ival = tree->gtIntCon.gtIconVal;
5011 #ifdef _TARGET_64BIT_
5018 #endif // _TARGET_64BIT_
5023 if ((ival & imask) != ival)
5028 #ifdef _TARGET_64BIT_
5031 tree->gtType = TYP_INT;
5032 tree->gtIntCon.gtIconVal = (int)ival;
5033 if (vnStore != nullptr)
5035 fgValueNumberTreeConst(tree);
5038 #endif // _TARGET_64BIT_
5042 /* Operands that are in memory can usually be narrowed
5043 simply by changing their gtType */
5046 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5047 if (dstSize == sizeof(int))
5060 noway_assert(doit == false);
5064 if (kind & (GTK_BINOP | GTK_UNOP))
5067 op1 = tree->gtOp.gtOp1;
5069 op2 = tree->gtOp.gtOp2;
5071 switch (tree->gtOper)
5074 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5076 // Is op2 a small constant than can be narrowed into dstt?
5077 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5078 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5080 // We will change the type of the tree and narrow op2
5084 tree->gtType = genActualType(dstt);
5085 tree->SetVNs(vnpNarrow);
5087 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5088 // We may also need to cast away the upper bits of op1
5091 assert(tree->gtType == TYP_INT);
5092 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5094 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5096 tree->gtOp.gtOp1 = op1;
5107 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5109 noway_assert(doit == false);
5117 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5118 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5120 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5121 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5123 noway_assert(doit == false);
5127 /* Simply change the type of the tree */
5131 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5133 tree->gtFlags &= ~GTF_MUL_64RSLT;
5136 tree->gtType = genActualType(dstt);
5137 tree->SetVNs(vnpNarrow);
5145 /* Simply change the type of the tree */
5147 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5149 tree->gtType = genSignedType(dstt);
5150 tree->SetVNs(vnpNarrow);
5152 /* Make sure we don't mess up the variable type */
5153 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5155 tree->gtFlags |= GTF_VAR_CAST;
5168 /* These can always be narrowed since they only represent 0 or 1 */
5173 var_types cast = tree->CastToType();
5174 var_types oprt = op1->TypeGet();
5175 unsigned oprSize = genTypeSize(oprt);
5182 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5187 if (tree->gtOverflow())
5192 /* Is this a cast from the type we're narrowing to or a smaller one? */
5194 if (oprSize <= dstSize)
5196 /* Bash the target type of the cast */
5200 dstt = genSignedType(dstt);
5202 if (oprSize == dstSize)
5204 // Same size: change the CAST into a NOP
5205 tree->ChangeOper(GT_NOP);
5206 tree->gtType = dstt;
5207 tree->gtOp.gtOp2 = nullptr;
5208 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5212 // oprSize is smaller
5213 assert(oprSize < dstSize);
5215 // Change the CastToType in the GT_CAST node
5216 tree->CastToType() = dstt;
5218 // The result type of a GT_CAST is never a small type.
5219 // Use genActualType to widen dstt when it is a small types.
5220 tree->gtType = genActualType(dstt);
5221 tree->SetVNs(vnpNarrow);
5231 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5233 /* Simply change the type of the tree */
5237 tree->gtType = genActualType(dstt);
5238 tree->SetVNs(vnpNarrow);
5245 noway_assert(doit == false);
5253 /*****************************************************************************
5255 * The following logic figures out whether the given variable is assigned
5256 * somewhere in a list of basic blocks (or in an entire loop).
5259 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5261 GenTreePtr tree = *pTree;
5263 if (tree->OperKind() & GTK_ASGOP)
5265 GenTreePtr dest = tree->gtOp.gtOp1;
5266 genTreeOps destOper = dest->OperGet();
5268 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5269 assert(desc && desc->ivaSelf == desc);
5271 if (destOper == GT_LCL_VAR)
5273 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5274 if (tvar < lclMAX_ALLSET_TRACKED)
5276 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5280 desc->ivaMaskIncomplete = true;
5283 if (tvar == desc->ivaVar)
5285 if (tree != desc->ivaSkip)
5291 else if (destOper == GT_LCL_FLD)
5293 /* We can't track every field of every var. Moreover, indirections
5294 may access different parts of the var as different (but
5295 overlapping) fields. So just treat them as indirect accesses */
5297 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5298 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5300 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5301 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5303 else if (destOper == GT_CLS_VAR)
5305 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5307 else if (destOper == GT_IND)
5309 /* Set the proper indirection bits */
5311 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5312 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5315 else if (tree->gtOper == GT_CALL)
5317 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5318 assert(desc && desc->ivaSelf == desc);
5320 desc->ivaMaskCall = optCallInterf(tree);
5323 return WALK_CONTINUE;
5326 /*****************************************************************************/
5328 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5333 desc.ivaSkip = skip;
5335 desc.ivaSelf = &desc;
5338 desc.ivaMaskCall = CALLINT_NONE;
5339 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5345 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5347 noway_assert(stmt->gtOper == GT_STMT);
5348 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5370 /*****************************************************************************/
5371 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5375 /* Get hold of the loop descriptor */
5377 noway_assert(lnum < optLoopCount);
5378 loop = optLoopTable + lnum;
5380 /* Do we already know what variables are assigned within this loop? */
5382 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5389 /* Prepare the descriptor used by the tree walker call-back */
5391 desc.ivaVar = (unsigned)-1;
5392 desc.ivaSkip = nullptr;
5394 desc.ivaSelf = &desc;
5396 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5397 desc.ivaMaskInd = VR_NONE;
5398 desc.ivaMaskCall = CALLINT_NONE;
5399 desc.ivaMaskIncomplete = false;
5401 /* Now walk all the statements of the loop */
5403 beg = loop->lpHead->bbNext;
5404 end = loop->lpBottom;
5406 for (/**/; /**/; beg = beg->bbNext)
5410 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5412 noway_assert(stmt->gtOper == GT_STMT);
5413 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5415 if (desc.ivaMaskIncomplete)
5417 loop->lpFlags |= LPFLG_ASGVARS_INC;
5427 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5428 loop->lpAsgInds = desc.ivaMaskInd;
5429 loop->lpAsgCall = desc.ivaMaskCall;
5431 /* Now we know what variables are assigned in the loop */
5433 loop->lpFlags |= LPFLG_ASGVARS_YES;
5436 /* Now we can finally test the caller's mask against the loop's */
5437 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5442 switch (loop->lpAsgCall)
5446 /* Can't hoist if the call might have side effect on an indirection. */
5448 if (loop->lpAsgInds != VR_NONE)
5455 case CALLINT_REF_INDIRS:
5457 /* Can't hoist if the call might have side effect on an ref indirection. */
5459 if (loop->lpAsgInds & VR_IND_REF)
5466 case CALLINT_SCL_INDIRS:
5468 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5470 if (loop->lpAsgInds & VR_IND_SCL)
5477 case CALLINT_ALL_INDIRS:
5479 /* Can't hoist if the call might have side effect on any indirection. */
5481 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5490 /* Other helpers kill nothing */
5495 noway_assert(!"Unexpected lpAsgCall value");
5501 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5506 printf("\nHoisting a copy of ");
5507 printTreeID(origExpr);
5508 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5509 optLoopTable[lnum].lpBottom->bbNum);
5510 gtDispTree(origExpr);
5515 // This loop has to be in a form that is approved for hoisting.
5516 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5518 // Create a copy of the expression and mark it for CSE's.
5519 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5521 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5522 assert(hoistExpr != origExpr);
5523 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5525 GenTreePtr hoist = hoistExpr;
5526 // The value of the expression isn't used (unless it's an assignment).
5527 if (hoistExpr->OperGet() != GT_ASG)
5529 hoist = gtUnusedValNode(hoistExpr);
5532 /* Put the statement in the preheader */
5534 fgCreateLoopPreHeader(lnum);
5536 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5537 assert(preHead->bbJumpKind == BBJ_NONE);
5539 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5540 // (or in this case, will contain) the expression.
5541 compCurBB = preHead;
5543 // Increment the ref counts of any local vars appearing in "hoist".
5544 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5545 // fold away some of the lcl vars referenced by "hoist".
5546 lvaRecursiveIncRefCounts(hoist);
5548 hoist = fgMorphTree(hoist);
5550 GenTreePtr hoistStmt = gtNewStmt(hoist);
5551 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5553 /* simply append the statement at the end of the preHead's list */
5555 GenTreePtr treeList = preHead->bbTreeList;
5559 /* append after last statement */
5561 GenTreePtr last = treeList->gtPrev;
5562 assert(last->gtNext == nullptr);
5564 last->gtNext = hoistStmt;
5565 hoistStmt->gtPrev = last;
5566 treeList->gtPrev = hoistStmt;
5570 /* Empty pre-header - store the single statement in the block */
5572 preHead->bbTreeList = hoistStmt;
5573 hoistStmt->gtPrev = hoistStmt;
5576 hoistStmt->gtNext = nullptr;
5581 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5586 if (fgStmtListThreaded)
5588 gtSetStmtInfo(hoistStmt);
5589 fgSetStmtSeq(hoistStmt);
5593 if (m_nodeTestData != nullptr)
5596 // What is the depth of the loop "lnum"?
5598 unsigned lnumIter = lnum;
5599 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5602 lnumIter = optLoopTable[lnumIter].lpParent;
5605 NodeToTestDataMap* testData = GetNodeTestData();
5607 TestLabelAndNum tlAndN;
5608 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5610 if (tlAndN.m_num == -1)
5613 printTreeID(origExpr);
5614 printf(" was declared 'do not hoist', but is being hoisted.\n");
5617 else if (tlAndN.m_num != depth)
5620 printTreeID(origExpr);
5621 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5623 tlAndN.m_num, depth);
5628 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5629 // hoist" annotations.
5630 testData->Remove(origExpr);
5631 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5632 tlAndN.m_tl = TL_CSE_Def;
5633 tlAndN.m_num = m_loopHoistCSEClass++;
5634 testData->Set(hoistExpr, tlAndN);
5640 #if LOOP_HOIST_STATS
5641 if (!m_curLoopHasHoistedExpression)
5643 m_loopsWithHoistedExpressions++;
5644 m_curLoopHasHoistedExpression = true;
5646 m_totalHoistedExpressions++;
5647 #endif // LOOP_HOIST_STATS
5650 void Compiler::optHoistLoopCode()
5652 // If we don't have any loops in the method then take an early out now.
5653 if (optLoopCount == 0)
5659 unsigned jitNoHoist = JitConfig.JitNoHoist();
5667 // The code in this #if has been useful in debugging loop cloning issues, by
5668 // enabling selective enablement of the loop cloning optimization according to
5671 unsigned methHash = info.compMethodHash();
5672 char* lostr = getenv("loophoisthashlo");
5673 unsigned methHashLo = 0;
5676 sscanf_s(lostr, "%x", &methHashLo);
5677 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5679 char* histr = getenv("loophoisthashhi");
5680 unsigned methHashHi = UINT32_MAX;
5683 sscanf_s(histr, "%x", &methHashHi);
5684 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5686 if (methHash < methHashLo || methHash > methHashHi)
5688 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5690 #endif // 0 -- debugging loop cloning issues
5695 printf("\n*************** In optHoistLoopCode()\n");
5696 printf("Blocks/Trees before phase\n");
5697 fgDispBasicBlocks(true);
5702 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5703 // they are invariant.)
5704 LoopHoistContext hoistCtxt(this);
5705 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5707 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5712 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5714 optHoistLoopNest(lnum, &hoistCtxt);
5723 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5724 fgDispBasicBlocks(true);
5728 // Make sure that the predecessor lists are accurate
5729 fgDebugCheckBBlist();
5734 // Test Data stuff..
5735 // If we have no test data, early out.
5736 if (m_nodeTestData == nullptr)
5740 NodeToTestDataMap* testData = GetNodeTestData();
5741 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5743 TestLabelAndNum tlAndN;
5744 GenTreePtr node = ki.Get();
5745 bool b = testData->Lookup(node, &tlAndN);
5747 if (tlAndN.m_tl != TL_LoopHoist)
5751 // Otherwise, it is a loop hoist annotation.
5752 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5753 if (tlAndN.m_num >= 0)
5757 printf(" was declared 'must hoist', but has not been hoisted.\n");
5764 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5766 // Do this loop, then recursively do all nested loops.
5767 CLANG_FORMAT_COMMENT_ANCHOR;
5769 #if LOOP_HOIST_STATS
5771 m_curLoopHasHoistedExpression = false;
5772 m_loopsConsidered++;
5773 #endif // LOOP_HOIST_STATS
5775 optHoistThisLoop(lnum, hoistCtxt);
5777 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5779 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5781 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5782 // TODO-Cleanup: we should have a set abstraction for loops.
5783 if (hoistedInCurLoop != nullptr)
5785 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5789 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5791 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5795 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5796 child = optLoopTable[child].lpSibling)
5798 optHoistLoopNest(child, hoistCtxt);
5802 // TODO-Cleanup: we should have a set abstraction for loops.
5803 if (hoistedInCurLoop != nullptr)
5805 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5807 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5808 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5814 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5816 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5818 /* If loop was removed continue */
5820 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5825 /* Get the head and tail of the loop */
5827 BasicBlock* head = pLoopDsc->lpHead;
5828 BasicBlock* tail = pLoopDsc->lpBottom;
5829 BasicBlock* lbeg = pLoopDsc->lpEntry;
5832 // We must have a do-while loop
5833 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5838 // The loop-head must dominate the loop-entry.
5839 // TODO-CQ: Couldn't we make this true if it's not?
5840 if (!fgDominate(head, lbeg))
5845 // if lbeg is the start of a new try block then we won't be able to hoist
5846 if (!BasicBlock::sameTryRegion(head, lbeg))
5851 // We don't bother hoisting when inside of a catch block
5852 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5857 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5859 unsigned begn = lbeg->bbNum;
5860 unsigned endn = tail->bbNum;
5862 // Ensure the per-loop sets/tables are empty.
5863 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5868 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5869 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5873 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5875 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5876 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5877 pLoopDsc->lpHoistedExprCount = 0;
5879 #ifndef _TARGET_64BIT_
5880 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5882 if (longVarsCount > 0)
5884 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5885 // the Counts such that each TYP_LONG variable counts twice.
5887 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5888 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5893 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5894 lvaDispVarSet(lvaLongVars);
5897 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5898 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5900 #endif // !_TARGET_64BIT_
5905 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5906 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5908 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5909 lvaDispVarSet(pLoopDsc->lpVarInOut);
5911 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5912 lvaDispVarSet(loopVars);
5917 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5919 if (floatVarsCount > 0)
5921 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5922 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5924 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5925 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5926 pLoopDsc->lpHoistedFPExprCount = 0;
5928 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5929 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5934 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5935 lvaDispVarSet(inOutFPVars);
5937 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5938 lvaDispVarSet(loopFPVars);
5942 else // (floatVarsCount == 0)
5944 pLoopDsc->lpLoopVarFPCount = 0;
5945 pLoopDsc->lpVarInOutFPCount = 0;
5946 pLoopDsc->lpHoistedFPExprCount = 0;
5949 // Find the set of definitely-executed blocks.
5950 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5951 // Until we have post-dominators, we'll special-case for single-exit blocks.
5952 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5953 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5955 assert(pLoopDsc->lpExit != nullptr);
5956 BasicBlock* cur = pLoopDsc->lpExit;
5957 // Push dominators, until we reach "entry" or exit the loop.
5958 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5963 // If we didn't reach the entry block, give up and *just* push the entry block.
5964 if (cur != pLoopDsc->lpEntry)
5968 defExec.Push(pLoopDsc->lpEntry);
5970 else // More than one exit
5972 // We'll assume that only the entry block is definitely executed.
5973 // We could in the future do better.
5974 defExec.Push(pLoopDsc->lpEntry);
5977 while (defExec.Size() > 0)
5979 // Consider in reverse order: dominator before dominatee.
5980 BasicBlock* blk = defExec.Pop();
5981 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5985 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5986 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
5988 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5989 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5990 unsigned blkWeight = blk->getBBWeight(this);
5995 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
5996 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
5997 firstBlockAndBeforeSideEffect ? "true" : "false");
5998 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6000 printf(" block weight is too small to perform hoisting.\n");
6005 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6007 // Block weight is too small to perform hoisting.
6011 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6013 GenTreePtr stmtTree = stmt->gtStmtExpr;
6015 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6018 // we will try to hoist the top-level stmtTree
6019 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6024 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6026 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6028 bool loopContainsCall = pLoopDsc->lpContainsCall;
6031 int hoistedExprCount;
6035 if (varTypeIsFloating(tree->TypeGet()))
6037 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6038 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6039 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6041 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6042 if (!loopContainsCall)
6044 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6047 // For ARM each double takes two FP registers
6048 // For now on ARM we won't track singles/doubles
6049 // and instead just assume that we always have doubles.
6056 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6057 loopVarCount = pLoopDsc->lpLoopVarCount;
6058 varInOutCount = pLoopDsc->lpVarInOutCount;
6060 availRegCount = CNT_CALLEE_SAVED - 1;
6061 if (!loopContainsCall)
6063 availRegCount += CNT_CALLEE_TRASH - 1;
6065 #ifndef _TARGET_64BIT_
6066 // For our 32-bit targets Long types take two registers.
6067 if (varTypeIsLong(tree->TypeGet()))
6069 availRegCount = (availRegCount + 1) / 2;
6074 // decrement the availRegCount by the count of expression that we have already hoisted.
6075 availRegCount -= hoistedExprCount;
6077 // the variables that are read/written inside the loop should
6078 // always be a subset of the InOut variables for the loop
6079 assert(loopVarCount <= varInOutCount);
6081 // When loopVarCount >= availRegCount we believe that all of the
6082 // available registers will get used to hold LclVars inside the loop.
6083 // This pessimistically assumes that each loopVar has a conflicting
6084 // lifetime with every other loopVar.
6085 // For this case we will hoist the expression only if is profitable
6086 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6087 // as we believe it will be placed in the stack or one of the other
6088 // loopVars will be spilled into the stack
6090 if (loopVarCount >= availRegCount)
6092 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6093 if (tree->gtCostEx < (2 * IND_COST_EX))
6099 // When varInOutCount < availRegCount we are know that there are
6100 // some available register(s) when we enter the loop body.
6101 // When varInOutCount == availRegCount there often will be a register
6102 // available when we enter the loop body, since a loop often defines a
6103 // LclVar on exit or there is often at least one LclVar that is worth
6104 // spilling to the stack to make way for this hoisted expression.
6105 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6107 if (varInOutCount > availRegCount)
6109 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6110 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6120 // This function returns true if 'tree' is a loop invariant expression.
6121 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6123 bool Compiler::optHoistLoopExprsForTree(
6124 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6126 // First do the children.
6127 // We must keep track of whether each child node was hoistable or not
6129 unsigned nChildren = tree->NumChildren();
6130 bool childrenHoistable[GenTree::MAX_CHILDREN];
6132 // Initialize the array elements for childrenHoistable[] to false
6133 for (unsigned i = 0; i < nChildren; i++)
6135 childrenHoistable[i] = false;
6138 bool treeIsInvariant = true;
6139 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6141 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6142 &childrenHoistable[childNum]))
6144 treeIsInvariant = false;
6148 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6150 bool treeIsHoistable = treeIsInvariant;
6152 // But we must see if anything else prevents "tree" from being hoisted.
6154 if (treeIsInvariant)
6156 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6157 treeIsHoistable = optIsCSEcandidate(tree);
6159 // If it's a call, it must be a helper call, and be pure.
6160 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6161 // (meaning it won't run a cctor because the class is not precise-init).
6162 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6164 GenTreeCall* call = tree->AsCall();
6165 if (call->gtCallType != CT_HELPER)
6167 treeIsHoistable = false;
6171 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6172 if (!s_helperCallProperties.IsPure(helpFunc))
6174 treeIsHoistable = false;
6176 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6178 treeIsHoistable = false;
6183 if (treeIsHoistable)
6185 if (!(*pFirstBlockAndBeforeSideEffect))
6187 // For now, we give up on an expression that might raise an exception if it is after the
6188 // first possible global side effect (and we assume we're after that if we're not in the first block).
6189 // TODO-CQ: this is when we might do loop cloning.
6191 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6193 treeIsHoistable = false;
6196 // Currently we must give up on reads from static variables (even if we are in the first block).
6198 if (tree->OperGet() == GT_CLS_VAR)
6200 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6202 treeIsHoistable = false;
6206 // Is the value of the whole tree loop invariant?
6208 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6210 // Is the value of the whole tree loop invariant?
6211 if (!treeIsInvariant)
6213 treeIsHoistable = false;
6217 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6218 // If we encounter a tree with a call in it
6219 // or if we see an assignment to global we set it to false.
6221 // If we are already set to false then we can skip these checks
6223 if (*pFirstBlockAndBeforeSideEffect)
6225 // For this purpose, we only care about memory side effects. We assume that expressions will
6226 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6227 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6228 // here, since that includes exceptions.)
6229 if (tree->gtFlags & GTF_CALL)
6231 *pFirstBlockAndBeforeSideEffect = false;
6233 else if (tree->OperIsAssignment())
6235 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6236 GenTreePtr lhs = tree->gtOp.gtOp1;
6237 if (lhs->gtFlags & GTF_GLOB_REF)
6239 *pFirstBlockAndBeforeSideEffect = false;
6242 else if (tree->OperIsCopyBlkOp())
6244 GenTreePtr args = tree->gtOp.gtOp1;
6245 assert(args->OperGet() == GT_LIST);
6246 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6248 *pFirstBlockAndBeforeSideEffect = false;
6253 // If this 'tree' is hoistable then we return and the caller will
6254 // decide to hoist it as part of larger hoistable expression.
6256 if (!treeIsHoistable)
6258 // We are not hoistable so we will now hoist any hoistable children.
6260 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6262 if (childrenHoistable[childNum])
6264 // We can't hoist the LHS of an assignment, isn't a real use.
6265 if (childNum == 0 && (tree->OperIsAssignment()))
6270 GenTreePtr child = tree->GetChild(childNum);
6272 // We try to hoist this 'child' tree
6273 optHoistCandidate(child, lnum, hoistCtxt);
6278 *pHoistable = treeIsHoistable;
6279 return treeIsInvariant;
6282 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6284 if (lnum == BasicBlock::NOT_IN_LOOP)
6286 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6290 // The outer loop also must be suitable for hoisting...
6291 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6296 // If the hoisted expression isn't valid at this loop head then break
6297 if (!optTreeIsValidAtLoopHead(tree, lnum))
6302 // It must pass the hoistable profitablity tests for this loop level
6303 if (!optIsProfitableToHoistableTree(tree, lnum))
6309 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6311 // already hoisted in a parent loop, so don't hoist this expression.
6315 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6317 // already hoisted this expression in the current loop, so don't hoist this expression.
6321 // Expression can be hoisted
6322 optPerformHoistExpr(tree, lnum);
6324 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6325 if (!varTypeIsFloating(tree->TypeGet()))
6327 optLoopTable[lnum].lpHoistedExprCount++;
6328 #ifndef _TARGET_64BIT_
6329 // For our 32-bit targets Long types take two registers.
6330 if (varTypeIsLong(tree->TypeGet()))
6332 optLoopTable[lnum].lpHoistedExprCount++;
6336 else // Floating point expr hoisted
6338 optLoopTable[lnum].lpHoistedFPExprCount++;
6341 // Record the hoisted expression in hoistCtxt
6342 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6345 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6347 // If it is not a VN, is not loop-invariant.
6348 if (vn == ValueNumStore::NoVN)
6353 // We'll always short-circuit constants.
6354 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6359 // If we've done this query previously, don't repeat.
6360 bool previousRes = false;
6361 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6368 if (vnStore->GetVNFunc(vn, &funcApp))
6370 if (funcApp.m_func == VNF_PhiDef)
6372 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6373 VNFuncApp phiDefValFuncApp;
6374 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6376 // It's not *really* a definition, rather a pass-through of some other VN.
6377 // (This could occur, say if both sides of an if-then-else diamond made the
6378 // same assignment to a variable.)
6379 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6383 // Is the definition within the loop? If so, is not loop-invariant.
6384 unsigned lclNum = funcApp.m_args[0];
6385 unsigned ssaNum = funcApp.m_args[1];
6386 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6387 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6390 else if (funcApp.m_func == VNF_PhiHeapDef)
6392 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6393 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6397 for (unsigned i = 0; i < funcApp.m_arity; i++)
6399 // TODO-CQ: We need to either make sure that *all* VN functions
6400 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6402 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6412 // Otherwise, assume non-function "new, unique" VN's are not loop invariant.
6416 loopVnInvariantCache->Set(vn, res);
6420 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6422 if (tree->OperIsLocal())
6424 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6425 unsigned lclNum = lclVar->gtLclNum;
6427 // The lvlVar must be have an Ssa tracked lifetime
6428 if (fgExcludeFromSsa(lclNum))
6433 // If the loop does not contains the SSA def we can hoist it.
6434 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6439 else if (tree->OperIsConst())
6443 else // If every one of the children nodes are valid at this Loop's Head.
6445 unsigned nChildren = tree->NumChildren();
6446 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6448 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6458 /*****************************************************************************
6460 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6461 * header. The pre-header will replace the current lpHead in the loop table.
6462 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6463 * will also be dominated by the loop-top, lpHead->bbNext.
6467 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6469 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6471 /* This loop has to be a "do-while" loop */
6473 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6475 /* Have we already created a loop-preheader block? */
6477 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6482 BasicBlock* head = pLoopDsc->lpHead;
6483 BasicBlock* top = pLoopDsc->lpTop;
6484 BasicBlock* entry = pLoopDsc->lpEntry;
6486 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6487 if (!BasicBlock::sameTryRegion(head, entry))
6492 // Ensure that lpHead always dominates lpEntry
6494 noway_assert(fgDominate(head, entry));
6496 /* Get hold of the first block of the loop body */
6498 assert(top == entry);
6500 /* Allocate a new basic block */
6502 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6503 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6505 // Must set IL code offset
6506 preHead->bbCodeOffs = top->bbCodeOffs;
6508 // Set the default value of the preHead weight in case we don't have
6509 // valid profile data and since this blocks weight is just an estimate
6510 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6512 preHead->inheritWeight(head);
6513 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6518 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6519 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6523 // The preheader block is part of the containing loop (if any).
6524 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6526 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6528 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6530 preHead->bbWeight = 0;
6531 preHead->bbFlags |= BBF_RUN_RARELY;
6535 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6536 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6537 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6539 if (allValidProfileWeights)
6541 double loopEnteredCount;
6542 double loopSkippedCount;
6544 if (fgHaveValidEdgeWeights)
6546 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6547 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6548 noway_assert(edgeToNext != nullptr);
6549 noway_assert(edgeToJump != nullptr);
6552 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6554 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6558 loopEnteredCount = (double)head->bbNext->bbWeight;
6559 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6562 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6564 // Calculate a good approximation of the preHead's block weight
6565 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6566 preHead->setBBWeight(max(preHeadWeight, 1));
6567 noway_assert(!preHead->isRunRarely());
6572 // Link in the preHead block.
6573 fgInsertBBbefore(top, preHead);
6575 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6576 // However, that is too expensive at this point. Instead, we update the phi
6577 // node block references, if we created pre-header block due to hoisting.
6578 // This is sufficient because any definition participating in SSA that flowed
6579 // into the phi via the loop header block will now flow through the preheader
6580 // block from the header block.
6582 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6584 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6585 if (tree->OperGet() != GT_ASG)
6589 GenTreePtr op2 = tree->gtGetOp2();
6590 if (op2->OperGet() != GT_PHI)
6594 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6595 while (args != nullptr)
6597 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6598 if (phiArg->gtPredBB == head)
6600 phiArg->gtPredBB = preHead;
6602 args = args->Rest();
6606 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6607 // to set the handler index on the pre header without updating the exception table.
6608 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6610 // Update the EH table to make the hoisted block part of the loop's EH block.
6611 fgExtendEHRegionBefore(top);
6613 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6614 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6616 /* Update the loop entry */
6618 pLoopDsc->lpHead = preHead;
6619 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6621 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6622 All predecessors of 'beg', (which is the entry in the loop)
6623 now have to jump to 'preHead', unless they are dominated by 'head' */
6625 preHead->bbRefs = 0;
6626 fgAddRefPred(preHead, head);
6627 bool checkNestedLoops = false;
6629 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6631 BasicBlock* predBlock = pred->flBlock;
6633 if (fgDominate(top, predBlock))
6635 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6636 // (we know that 'head' dominates 'top'), but using 'top' instead of
6637 // 'head' in the test allows us to not enter here if 'predBlock == head'
6639 if (predBlock != pLoopDsc->lpBottom)
6641 noway_assert(predBlock != head);
6642 checkNestedLoops = true;
6647 switch (predBlock->bbJumpKind)
6650 noway_assert(predBlock == head);
6654 if (predBlock == head)
6656 noway_assert(predBlock->bbJumpDest != top);
6662 case BBJ_EHCATCHRET:
6663 noway_assert(predBlock->bbJumpDest == top);
6664 predBlock->bbJumpDest = preHead;
6665 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6667 if (predBlock == head)
6669 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6670 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6671 // Just break, pred will be removed after switch.
6675 fgRemoveRefPred(top, predBlock);
6676 fgAddRefPred(preHead, predBlock);
6682 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6683 BasicBlock** jumpTab;
6684 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6689 if ((*jumpTab) == top)
6691 (*jumpTab) = preHead;
6693 fgRemoveRefPred(top, predBlock);
6694 fgAddRefPred(preHead, predBlock);
6695 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6697 } while (++jumpTab, --jumpCnt);
6700 noway_assert(!"Unexpected bbJumpKind");
6705 noway_assert(!fgGetPredForBlock(top, preHead));
6706 fgRemoveRefPred(top, head);
6707 fgAddRefPred(top, preHead);
6710 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6711 (other than the back-edge of the loop we are considering) then we likely have nested
6712 do-while loops with the same entry block and inserting the preheader block changes the head
6713 of all the nested loops. Now we will update this piece of information in the loop table, and
6714 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6715 do-while loops with the same entry block).
6717 if (checkNestedLoops)
6719 for (unsigned l = 0; l < optLoopCount; l++)
6721 if (optLoopTable[l].lpHead == head)
6723 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6724 noway_assert(optLoopTable[l].lpEntry == top);
6725 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6726 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6730 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6731 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6739 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6741 unsigned lnum = blk->bbNatLoopNum;
6742 while (lnum != BasicBlock::NOT_IN_LOOP)
6744 if (optLoopTable[lnum].lpEntry == blk)
6749 lnum = optLoopTable[lnum].lpParent;
6754 void Compiler::optComputeLoopSideEffects()
6757 for (lnum = 0; lnum < optLoopCount; lnum++)
6759 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6760 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6761 optLoopTable[lnum].lpContainsCall = false;
6764 for (lnum = 0; lnum < optLoopCount; lnum++)
6766 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6771 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6772 { // Is outermost...
6773 optComputeLoopNestSideEffects(lnum);
6777 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6778 #ifndef _TARGET_64BIT_
6779 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6782 for (unsigned i = 0; i < lvaCount; i++)
6784 LclVarDsc* varDsc = &lvaTable[i];
6785 if (varDsc->lvTracked)
6787 if (varTypeIsFloating(varDsc->lvType))
6789 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6791 #ifndef _TARGET_64BIT_
6792 else if (varTypeIsLong(varDsc->lvType))
6794 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6801 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6803 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6804 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6805 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6807 optComputeLoopSideEffectsOfBlock(bbInLoop);
6811 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6813 unsigned mostNestedLoop = blk->bbNatLoopNum;
6814 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6816 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6818 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6820 // Now iterate over the remaining statements, and their trees.
6821 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6823 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6825 genTreeOps oper = tree->OperGet();
6827 // Even after we set heapHavoc we still may want to know if a loop contains calls
6830 if (oper == GT_CALL)
6832 // Record that this loop contains a call
6833 AddContainsCallAllContainingLoops(mostNestedLoop);
6836 // If we just set lpContainsCall or it was previously set
6837 if (optLoopTable[mostNestedLoop].lpContainsCall)
6839 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6843 // We are just looking for GT_CALL nodes after heapHavoc was set.
6847 // otherwise heapHavoc is not set
6850 // This body is a distillation of the heap-side effect code of value numbering.
6851 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6852 // that the compiler creates.
6854 if (GenTree::OperIsAssignment(oper))
6856 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6858 if (lhs->OperGet() == GT_IND)
6860 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6861 FieldSeqNode* fldSeqArrElem = nullptr;
6863 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6871 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6873 // If it's a local byref for which we recorded a value number, use that...
6874 GenTreeLclVar* argLcl = arg->AsLclVar();
6875 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6878 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6880 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6881 funcApp.m_func == VNF_PtrToArrElem)
6883 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6884 CORINFO_CLASS_HANDLE elemType =
6885 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6886 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6887 // Don't set heapHavoc below.
6894 // Is the LHS an array index expression?
6895 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6897 // We actually ignore "fldSeq" -- any modification to an S[], at any
6898 // field of "S", will lose all information about the array type.
6899 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6900 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6904 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6906 GenTreePtr obj = nullptr; // unused
6907 GenTreePtr staticOffset = nullptr; // unused
6908 FieldSeqNode* fldSeq = nullptr;
6910 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6911 (fldSeq != FieldSeqStore::NotAField()))
6913 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6914 assert(fldSeq != nullptr);
6915 if (fldSeq->IsFirstElemFieldSeq())
6917 fldSeq = fldSeq->m_next;
6918 assert(fldSeq != nullptr);
6921 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6929 else if (lhs->OperGet() == GT_CLS_VAR)
6931 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6933 // Otherwise, must be local lhs form. I should assert that.
6934 else if (lhs->OperGet() == GT_LCL_VAR)
6936 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6937 GenTreePtr rhs = tree->gtOp.gtOp2;
6938 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6939 // If we gave the RHS a value number, propagate it.
6940 if (rhsVN != ValueNumStore::NoVN)
6942 rhsVN = vnStore->VNNormVal(rhsVN);
6943 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6945 lvaTable[lhsLcl->GetLclNum()]
6946 .GetPerSsaData(lhsLcl->GetSsaNum())
6947 ->m_vnPair.SetLiberal(rhsVN);
6952 else // not GenTree::OperIsAssignment(oper)
6957 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
6961 // Is it an addr of a array index expression?
6963 GenTreePtr addrArg = tree->gtOp.gtOp1;
6964 if (addrArg->OperGet() == GT_IND)
6966 // Is the LHS an array index expression?
6967 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
6970 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
6972 CORINFO_CLASS_HANDLE elemType =
6973 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6974 tree->gtVNPair.SetBoth(
6975 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
6976 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
6977 // The rest are dummy arguments.
6978 vnStore->VNForNull(), vnStore->VNForNull(),
6979 vnStore->VNForNull()));
6989 GenTreeLclVarCommon* lclVarTree;
6991 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6993 // For now, assume arbitrary side effects on the heap...
6994 // TODO-CQ: Why not be complete, and get this case right?
7000 case GT_LOCKADD: // Binop
7001 case GT_XADD: // Binop
7002 case GT_XCHG: // Binop
7003 case GT_CMPXCHG: // Specialop
7011 GenTreeCall* call = tree->AsCall();
7013 // Record that this loop contains a call
7014 AddContainsCallAllContainingLoops(mostNestedLoop);
7016 if (call->gtCallType == CT_HELPER)
7018 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7019 if (s_helperCallProperties.MutatesHeap(helpFunc))
7023 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7025 // If the call is labeled as "Hoistable", then we've checked the
7026 // class that would be constructed, and it is not precise-init, so
7027 // the cctor will not be run by this call. Otherwise, it might be,
7028 // and might have arbitrary side effects.
7029 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7043 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7052 // Record that all loops containing this block have heap havoc effects.
7053 unsigned lnum = mostNestedLoop;
7054 while (lnum != BasicBlock::NOT_IN_LOOP)
7056 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7057 lnum = optLoopTable[lnum].lpParent;
7062 // Marks the containsCall information to "lnum" and any parent loops.
7063 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7065 assert(0 <= lnum && lnum < optLoopCount);
7066 while (lnum != BasicBlock::NOT_IN_LOOP)
7068 optLoopTable[lnum].lpContainsCall = true;
7069 lnum = optLoopTable[lnum].lpParent;
7073 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7074 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7076 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7077 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7079 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7080 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7083 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7084 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7086 assert(0 <= lnum && lnum < optLoopCount);
7087 while (lnum != BasicBlock::NOT_IN_LOOP)
7089 optLoopTable[lnum].AddVariableLiveness(this, blk);
7090 lnum = optLoopTable[lnum].lpParent;
7094 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7095 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7097 assert(0 <= lnum && lnum < optLoopCount);
7098 while (lnum != BasicBlock::NOT_IN_LOOP)
7100 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7101 lnum = optLoopTable[lnum].lpParent;
7105 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7106 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7108 assert(0 <= lnum && lnum < optLoopCount);
7109 while (lnum != BasicBlock::NOT_IN_LOOP)
7111 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7112 lnum = optLoopTable[lnum].lpParent;
7116 /*****************************************************************************
7118 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7119 * The 'keepList'is either a single tree or a list of trees that are formed by
7120 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7121 * gtExtractSideEffList method.
7125 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7127 GenTreePtr tree = *pTree;
7128 Compiler* comp = data->compiler;
7129 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7131 // We may have a non-NULL side effect list that is being kept
7135 GenTreePtr keptTree = keepList;
7136 while (keptTree->OperGet() == GT_COMMA)
7138 assert(keptTree->OperKind() & GTK_SMPOP);
7139 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7140 GenTreePtr op2 = keptTree->gtGetOp2();
7142 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7143 // that is being kept because it contains some side-effect
7147 // This tree and all of its sub trees are being kept.
7148 return WALK_SKIP_SUBTREES;
7151 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7152 // which can again be another GT_COMMA or the final side-effect part
7156 if (tree == keptTree)
7158 // This tree and all of its sub trees are being kept.
7159 return WALK_SKIP_SUBTREES;
7163 // This node is being removed from the graph of GenTreePtr
7165 // Look for any local variable references
7167 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7172 /* This variable ref is going away, decrease its ref counts */
7174 lclNum = tree->gtLclVarCommon.gtLclNum;
7175 assert(lclNum < comp->lvaCount);
7176 varDsc = comp->lvaTable + lclNum;
7178 // make sure it's been initialized
7179 assert(comp->compCurBB != nullptr);
7180 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7182 /* Decrement its lvRefCnt and lvRefCntWtd */
7184 // Use getBBWeight to determine the proper block weight.
7185 // This impacts the block weights when we have IBC data.
7186 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7189 return WALK_CONTINUE;
7192 /*****************************************************************************
7194 * Routine called to decrement the LclVar ref counts when removing a tree
7195 * during the remove RangeCheck phase.
7196 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7197 * unless the node is found in the 'keepList' (which are saved side effects)
7198 * The keepList is communicated using the walkData.pCallbackData field
7199 * Also the compCurBB must be set to the current BasicBlock which contains
7200 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7203 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7205 // We communicate this value using the walkData.pCallbackData field
7207 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7210 /*****************************************************************************
7212 * Given an array index node, mark it as not needing a range check.
7215 void Compiler::optRemoveRangeCheck(
7216 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7232 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7235 noway_assert(stmt->gtOper == GT_STMT);
7236 noway_assert(tree->gtOper == GT_COMMA);
7237 noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
7238 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7240 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7245 printf("Before optRemoveRangeCheck:\n");
7250 GenTreePtr sideEffList = nullptr;
7253 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7256 // Decrement the ref counts for any LclVars that are being deleted
7258 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7260 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7261 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7263 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7264 tree->gtFlags |= GTF_DONT_CSE;
7266 /* Recalculate the gtCostSz, etc... */
7267 gtSetStmtInfo(stmt);
7269 /* Re-thread the nodes if necessary */
7270 if (fgStmtListThreaded)
7278 printf("After optRemoveRangeCheck:\n");
7284 /*****************************************************************************
7285 * Return the scale in an array reference, given a pointer to the
7286 * multiplication node.
7289 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7292 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7293 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7295 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7297 if (mul->gtOper == GT_LSH)
7299 scale = ((ssize_t)1) << scale;
7302 GenTreePtr index = mul->gtOp.gtOp1;
7304 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7306 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7307 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7308 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7309 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7310 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7311 index = index->gtOp.gtOp1;
7314 assert(!bRngChk || index->gtOper != GT_COMMA);
7324 /*****************************************************************************
7325 * Find the last assignment to of the local variable in the block. Return
7326 * RHS or NULL. If any local variable in the RHS has been killed in
7327 * intervening code, return NULL. If the variable being searched for is killed
7328 * in the intervening code, return NULL.
7332 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7334 VARSET_TP* pKilledInOut,
7335 bool* pLhsRhsKilledAfterInit)
7337 assert(pKilledInOut);
7338 assert(pLhsRhsKilledAfterInit);
7340 *pLhsRhsKilledAfterInit = false;
7342 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7344 GenTreePtr list = block->bbTreeList;
7345 if (list == nullptr)
7350 GenTreePtr rhs = nullptr;
7351 GenTreePtr stmt = list;
7354 stmt = stmt->gtPrev;
7355 if (stmt == nullptr)
7360 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7361 // If we encounter an assignment to a local variable,
7362 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7364 // And the assigned variable equals the input local,
7365 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7367 // If the assignment is '=' and it is not a conditional, then return rhs.
7368 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7370 rhs = tree->gtOp.gtOp2;
7372 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7373 // as we found a kill to the input local.
7376 *pLhsRhsKilledAfterInit = true;
7377 assert(rhs == nullptr);
7383 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7384 if (varDsc == nullptr)
7388 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7391 } while (stmt != list);
7398 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7399 varRefKinds rhsRefs = VR_NONE;
7400 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7401 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7402 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7404 // If RHS has been indirectly referenced, consider it a write and a kill.
7405 *pLhsRhsKilledAfterInit = true;
7412 /*****************************************************************************
7414 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7419 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7421 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7423 add1 += op1->gtIntCon.gtIconVal;
7424 add2 += op2->gtIntCon.gtIconVal;
7428 /* Check for +/- constant on either operand */
7430 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7432 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7433 op1 = op1->gtOp.gtOp1;
7436 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7438 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7439 op2 = op2->gtOp.gtOp1;
7442 /* We only allow local variable references */
7444 if (op1->gtOper != GT_LCL_VAR)
7446 if (op2->gtOper != GT_LCL_VAR)
7448 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7451 /* NOTE: Caller ensures that this variable has only one def */
7453 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7454 // printf("size [%d]:\n", add2); gtDispTree(op2);
7458 return (bool)(add1 <= add2);
7463 //------------------------------------------------------------------------------
7464 // optObtainLoopCloningOpts: Identify optimization candidates and update
7465 // the "context" for array optimizations.
7468 // context - data structure where all loop cloning info is kept. The
7469 // optInfo fields of the context are updated with the
7470 // identified optimization candidates.
7472 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7474 for (unsigned i = 0; i < optLoopCount; i++)
7476 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7477 if (optIsLoopClonable(i))
7479 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7481 optIdentifyLoopOptInfo(i, context);
7484 JITDUMP("------------------------------------------------------------\n");
7489 //------------------------------------------------------------------------
7490 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7491 // check if the loop is suitable for the optimizations performed.
7494 // loopNum - the current loop index for which conditions are derived.
7495 // context - data structure where all loop cloning candidates will be
7499 // If the loop is not suitable for the optimizations, return false - context
7500 // should not contain any optimization candidate for the loop if false.
7501 // Else return true.
7504 // Check if the loop is well formed for this optimization and identify the
7505 // optimization candidates and update the "context" parameter with all the
7506 // contextual information necessary to perform the optimization later.
7508 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7510 noway_assert(loopNum < optLoopCount);
7512 LoopDsc* pLoop = &optLoopTable[loopNum];
7514 if (!(pLoop->lpFlags & LPFLG_ITER))
7516 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7520 unsigned ivLclNum = pLoop->lpIterVar();
7521 if (lvaVarAddrExposed(ivLclNum))
7523 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7527 BasicBlock* head = pLoop->lpHead;
7528 BasicBlock* end = pLoop->lpBottom;
7529 BasicBlock* beg = head->bbNext;
7531 if (end->bbJumpKind != BBJ_COND)
7533 JITDUMP("> Couldn't find termination test.\n");
7537 if (end->bbJumpDest != beg)
7539 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7543 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7544 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7546 JITDUMP("> Loop iteration operator not matching\n");
7550 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7551 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7553 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7557 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7558 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7559 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7560 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7562 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7563 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7567 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7569 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7570 pLoop->lpTestTree->gtTreeID);
7575 GenTreePtr op1 = pLoop->lpIterator();
7576 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7579 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7580 end->bbNext ? end->bbNext->bbNum : 0);
7582 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7583 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7586 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7589 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7596 //---------------------------------------------------------------------------------------------------------------
7597 // optExtractArrIndex: Try to extract the array index from "tree".
7600 // tree the tree to be checked if it is the array [] operation.
7601 // result the extracted GT_INDEX information is updated in result.
7602 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7605 // Returns true if array index can be extracted, else, return false. See assumption about
7606 // what will be extracted. The "result" variable's rank parameter is advanced for every
7607 // dimension of [] encountered.
7610 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7611 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7612 // to reconstruct this to be able to know if this is an array access.
7615 // The method extracts only if the array base and indices are GT_LCL_VAR.
7617 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7619 // [000000001AF828D8] ---XG------- indir int
7620 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7621 // [000000001AF87340] ------------ + byref
7622 // [000000001AF87160] -------N---- const long 2
7623 // [000000001AF871D8] ------------ << long
7624 // [000000001AF870C0] ------------ cast long <- int
7625 // [000000001AF86F30] i----------- lclVar int V04 loc0
7626 // [000000001AF87250] ------------ + byref
7627 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7628 // [000000001AF87468] ---XG------- comma int
7629 // [000000001AF87020] ---X-------- arrBndsChk void
7630 // [000000001AF86FA8] ---X-------- arrLen int
7631 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7632 // [000000001AF82860] ------------ lclVar int V04 loc0
7633 // [000000001AF829F0] -A-XG------- = int
7634 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7636 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7638 if (tree->gtOper != GT_COMMA)
7642 GenTreePtr before = tree->gtGetOp1();
7643 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7647 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7648 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7652 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7656 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7657 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7662 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7664 GenTreePtr after = tree->gtGetOp2();
7666 if (after->gtOper != GT_IND)
7670 GenTreePtr sibo = after->gtGetOp1();
7671 if (sibo->gtOper != GT_ADD)
7675 GenTreePtr sib = sibo->gtGetOp1();
7676 GenTreePtr ofs = sibo->gtGetOp2();
7677 if (ofs->gtOper != GT_CNS_INT)
7681 if (sib->gtOper != GT_ADD)
7685 GenTreePtr si = sib->gtGetOp2();
7686 GenTreePtr base = sib->gtGetOp1();
7687 if (si->gtOper != GT_LSH)
7691 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7695 GenTreePtr scale = si->gtGetOp2();
7696 GenTreePtr index = si->gtGetOp1();
7697 if (scale->gtOper != GT_CNS_INT)
7701 #ifdef _TARGET_AMD64_
7702 if (index->gtOper != GT_CAST)
7706 GenTreePtr indexVar = index->gtGetOp1();
7708 GenTreePtr indexVar = index;
7710 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7714 if (lhsNum == BAD_VAR_NUM)
7716 result->arrLcl = arrLcl;
7718 result->indLcls.Push(indLcl);
7719 result->bndsChks.Push(tree);
7720 result->useBlock = compCurBB;
7726 //---------------------------------------------------------------------------------------------------------------
7727 // optReconstructArrIndex: Reconstruct array index.
7730 // tree the tree to be checked if it is an array [][][] operation.
7731 // result the extracted GT_INDEX information.
7732 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7735 // Returns true if array index can be extracted, else, return false. "rank" field in
7736 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7739 // Recursively look for a list of array indices. In the example below, we encounter,
7740 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7741 // The return value would then be:
7742 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7744 // V00[V01][V02] would be morphed as:
7746 // [000000001B366848] ---XG------- indir int
7747 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7748 // [000000001B36C200] ---XG------- comma int
7749 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7750 // [000000001B36C278] -A-XG------- comma int
7751 // [000000001B366730] R--XG------- indir ref
7752 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7753 // [000000001B36C818] ---XG------- comma ref
7754 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7755 // [000000001B36BB60] -A-XG------- = ref
7756 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7757 // [000000001B36A668] -A-XG------- = int
7758 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7761 // The method extracts only if the array base and indices are GT_LCL_VAR.
7763 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7765 // If we can extract "tree" (which is a top level comma) return.
7766 if (optExtractArrIndex(tree, result, lhsNum))
7770 // We have a comma (check if array base expr is computed in "before"), descend further.
7771 else if (tree->OperGet() == GT_COMMA)
7773 GenTreePtr before = tree->gtGetOp1();
7774 // "before" should evaluate an array base for the "after" indexing.
7775 if (before->OperGet() != GT_ASG)
7779 GenTreePtr lhs = before->gtGetOp1();
7780 GenTreePtr rhs = before->gtGetOp2();
7782 // "rhs" should contain an GT_INDEX
7783 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7787 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7788 GenTreePtr after = tree->gtGetOp2();
7789 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7790 return optExtractArrIndex(after, result, lhsNum);
7796 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7798 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7801 //-------------------------------------------------------------------------
7802 // optIsStackLocalInvariant: Is stack local invariant in loop.
7805 // loopNum The loop in which the variable is tested for invariance.
7806 // lclNum The local that is tested for invariance in the loop.
7809 // Returns true if the variable is loop invariant in loopNum.
7811 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7813 if (lvaVarAddrExposed(lclNum))
7817 if (optIsVarAssgLoop(loopNum, lclNum))
7824 //----------------------------------------------------------------------------------------------
7825 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7826 // identify as potential candidate and update the loop context.
7829 // tree The tree encountered during the tree walk.
7830 // info Supplies information about the current block or stmt in which the tree is.
7831 // Also supplies the "context" pointer for updating with loop cloning
7832 // candidates. Also supplies loopNum.
7835 // If array index can be reconstructed, check if the iter var of the loop matches the
7836 // array index var in some dim. Also ensure other index vars before the identified
7837 // dim are loop invariant.
7840 // Skip sub trees if the optimization candidate is identified or else continue walking
7842 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7844 ArrIndex arrIndex(getAllocator());
7846 // Check if array index can be optimized.
7847 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7849 assert(tree->gtOper == GT_COMMA);
7853 JITDUMP("Found ArrIndex at tree ");
7855 printf(" which is equivalent to: ");
7860 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7862 return WALK_SKIP_SUBTREES;
7865 // Walk the dimensions and see if iterVar of the loop is used as index.
7866 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7868 // Is index variable also used as the loop iter var.
7869 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7871 // Check the previous indices are all loop invariant.
7872 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7874 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7876 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7877 return WALK_SKIP_SUBTREES;
7883 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7885 JITDUMP(" on dim %d\n", dim);
7888 // Update the loop context.
7889 info->context->EnsureLoopOptInfo(info->loopNum)
7890 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7894 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7898 return WALK_SKIP_SUBTREES;
7900 else if (tree->gtOper == GT_ARR_ELEM)
7902 // TODO-CQ: CLONE: Implement.
7903 return WALK_SKIP_SUBTREES;
7905 return WALK_CONTINUE;
7908 struct optRangeCheckDsc
7910 Compiler* pCompiler;
7914 Walk to make sure that only locals and constants are contained in the index
7917 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7919 GenTreePtr tree = *pTree;
7920 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7922 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7924 pData->bValidIndex = false;
7928 if (tree->gtOper == GT_LCL_VAR)
7930 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7932 pData->bValidIndex = false;
7937 return WALK_CONTINUE;
7941 returns true if a range check can legally be removed (for the moment it checks
7942 that the array is a local array (non subject to racing conditions) and that the
7943 index is either a constant or a local
7945 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7947 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7948 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7949 GenTreePtr pArray = bndsChk->GetArray();
7950 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
7954 GenTreePtr pIndex = bndsChk->gtIndex;
7956 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
7957 // Otherwise we can be targeted by malicious race-conditions.
7958 if (pArray != nullptr)
7960 if (pArray->gtOper != GT_LCL_VAR)
7966 printf("Can't remove range check if the array isn't referenced with a local\n");
7974 noway_assert(pArray->gtType == TYP_REF);
7975 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
7977 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
7979 // If the array address has been taken, don't do the optimization
7980 // (this restriction can be lowered a bit, but i don't think it's worth it)
7981 CLANG_FORMAT_COMMENT_ANCHOR;
7985 printf("Can't remove range check if the array has its address taken\n");
7994 optRangeCheckDsc Data;
7995 Data.pCompiler = this;
7996 Data.bValidIndex = true;
7998 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8000 if (!Data.bValidIndex)
8005 printf("Can't remove range check with this index");
8016 /******************************************************************************
8018 * Replace x==null with (x|x)==0 if x is a GC-type.
8019 * This will stress code-gen and the emitter to make sure they support such trees.
8024 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8026 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8031 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8032 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8034 noway_assert(condStmt->gtOper == GT_JTRUE);
8039 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8041 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8046 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8051 GenTreePtr comparandClone = gtCloneExpr(comparand);
8053 // Bump up the ref-counts of any variables in 'comparandClone'
8054 compCurBB = condBlock;
8055 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8057 noway_assert(relop->gtOp.gtOp1 == comparand);
8058 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8059 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8061 // Comparand type is already checked, and we have const int, there is no harm
8062 // morphing it into a TYP_I_IMPL.
8063 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8064 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8069 /******************************************************************************
8070 * Function used by folding of boolean conditionals
8071 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8072 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8073 * being a boolean lclVar and "op2" the const 0/1.
8074 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8075 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8076 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8077 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8078 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8081 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8083 bool isBool = false;
8085 noway_assert(condBranch->gtOper == GT_JTRUE);
8086 GenTree* cond = condBranch->gtOp.gtOp1;
8088 /* The condition must be "!= 0" or "== 0" */
8090 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8095 /* Return the compare node to the caller */
8099 /* Get hold of the comparands */
8101 GenTree* opr1 = cond->gtOp.gtOp1;
8102 GenTree* opr2 = cond->gtOp.gtOp2;
8104 if (opr2->gtOper != GT_CNS_INT)
8109 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8114 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8116 /* Is the value a boolean?
8117 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8118 * a local variable that is marked as being boolean (lvIsBoolean) */
8120 if (opr1->gtFlags & GTF_BOOLEAN)
8124 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8128 else if (opr1->gtOper == GT_LCL_VAR)
8130 /* is it a boolean local variable */
8132 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8133 noway_assert(lclNum < lvaCount);
8135 if (lvaTable[lclNum].lvIsBoolean)
8141 /* Was our comparison against the constant 1 (i.e. true) */
8144 // If this is a boolean expression tree we can reverse the relop
8145 // and change the true to false.
8148 gtReverseCond(cond);
8149 opr2->gtIntCon.gtIconVal = 0;
8161 void Compiler::optOptimizeBools()
8166 printf("*************** In optOptimizeBools()\n");
8169 printf("Blocks/Trees before phase\n");
8170 fgDispBasicBlocks(true);
8180 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8182 /* We're only interested in conditional jumps here */
8184 if (b1->bbJumpKind != BBJ_COND)
8189 /* If there is no next block, we're done */
8191 BasicBlock* b2 = b1->bbNext;
8197 /* The next block must not be marked as BBF_DONT_REMOVE */
8198 if (b2->bbFlags & BBF_DONT_REMOVE)
8203 /* The next block also needs to be a condition */
8205 if (b2->bbJumpKind != BBJ_COND)
8208 optOptimizeBoolsGcStress(b1);
8213 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8215 if (b1->bbJumpDest == b2->bbJumpDest)
8217 /* Given the following sequence of blocks :
8221 we wil try to fold it to :
8222 B1: brtrue(t1|t2, BX)
8228 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8230 /* Given the following sequence of blocks :
8234 we will try to fold it to :
8235 B1: brtrue((!t1)&&t2, B3)
8246 /* The second block must contain a single statement */
8248 GenTreePtr s2 = b2->bbTreeList;
8249 if (s2->gtPrev != s2)
8254 noway_assert(s2->gtOper == GT_STMT);
8255 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8256 noway_assert(t2->gtOper == GT_JTRUE);
8258 /* Find the condition for the first block */
8260 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8262 noway_assert(s1->gtOper == GT_STMT);
8263 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8264 noway_assert(t1->gtOper == GT_JTRUE);
8266 if (b2->countOfInEdges() > 1)
8271 /* Find the branch conditions of b1 and b2 */
8275 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8281 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8287 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8288 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8290 // Leave out floats where the bit-representation is more complicated
8291 // - there are two representations for 0.
8293 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8298 // Make sure the types involved are of the same sizes
8299 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8303 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8307 #ifdef _TARGET_ARMARCH_
8308 // Skip the small operand which we cannot encode.
8309 if (varTypeIsSmall(c1->TypeGet()))
8312 /* The second condition must not contain side effects */
8314 if (c2->gtFlags & GTF_GLOB_EFFECT)
8319 /* The second condition must not be too expensive */
8323 if (c2->gtCostEx > 12)
8330 var_types foldType = c1->TypeGet();
8331 if (varTypeIsGC(foldType))
8333 foldType = TYP_I_IMPL;
8338 /* Both conditions must be the same */
8340 if (t1->gtOper != t2->gtOper)
8345 if (t1->gtOper == GT_EQ)
8347 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8348 So we will branch to BX if (c1&c2)==0 */
8355 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8356 So we will branch to BX if (c1|c2)!=0 */
8364 /* The b1 condition must be the reverse of the b2 condition */
8366 if (t1->gtOper == t2->gtOper)
8371 if (t1->gtOper == GT_EQ)
8373 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8374 So we will branch to BX if (c1&c2)!=0 */
8381 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8382 So we will branch to BX if (c1|c2)==0 */
8389 // Anding requires both values to be 0 or 1
8391 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8397 // Now update the trees
8399 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8402 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8403 cmpOp1->gtFlags |= GTF_BOOLEAN;
8407 t1->gtOp.gtOp1 = cmpOp1;
8408 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8410 #if FEATURE_SET_FLAGS
8411 // For comparisons against zero we will have the GTF_SET_FLAGS set
8412 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8413 // during the CSE phase.
8415 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8416 // as they are no longer feeding directly into a comparisons against zero
8418 // Make sure that the GTF_SET_FLAGS bit is cleared.
8419 // Fix 388436 ARM JitStress WP7
8420 c1->gtFlags &= ~GTF_SET_FLAGS;
8421 c2->gtFlags &= ~GTF_SET_FLAGS;
8423 // The new top level node that we just created does feed directly into
8424 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8425 // we generate an instuction that sets the flags, which allows us
8426 // to omit the cmp with zero instruction.
8428 // Request that the codegen for cmpOp1 sets the condition flags
8429 // when it generates the code for cmpOp1.
8431 cmpOp1->gtRequestSetFlags();
8434 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8437 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8441 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8445 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8447 fgRemoveRefPred(b1->bbJumpDest, b1);
8449 b1->bbJumpDest = b2->bbJumpDest;
8451 fgAddRefPred(b2->bbJumpDest, b1);
8454 noway_assert(edge1 != nullptr);
8455 noway_assert(edge2 != nullptr);
8457 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8458 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8459 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8461 edge1->flEdgeWeightMin = edgeSumMin;
8462 edge1->flEdgeWeightMax = edgeSumMax;
8466 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8467 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8470 /* Get rid of the second block (which is a BBJ_COND) */
8472 noway_assert(b1->bbJumpKind == BBJ_COND);
8473 noway_assert(b2->bbJumpKind == BBJ_COND);
8474 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8475 noway_assert(b1->bbNext == b2);
8476 noway_assert(b2->bbNext);
8479 b2->bbFlags |= BBF_REMOVED;
8481 // If b2 was the last block of a try or handler, update the EH table.
8483 ehUpdateForDeletedBlock(b2);
8485 /* Update bbRefs and bbPreds */
8487 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8488 * Remove pred 'b2' for 'b2->bbJumpDest' */
8490 fgReplacePred(b2->bbNext, b2, b1);
8492 fgRemoveRefPred(b2->bbJumpDest, b2);
8494 /* Update the block numbers and try again */
8506 // Update loop table
8507 fgUpdateLoopsAfterCompacting(b1, b2);
8512 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8513 b1->bbNum, b2->bbNum);
8522 fgDebugCheckBBlist();