1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
826 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
828 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
830 else if (limitOp->gtOper == GT_ARR_LENGTH)
832 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
838 // Save the type of the comparison between the iterator and the limit.
839 optLoopTable[loopInd].lpTestTree = relop;
843 //----------------------------------------------------------------------------------
844 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
847 // incr The incr tree to be checked. Whether incr tree is
848 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
851 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
852 // and the rhs limit is extracted from the "test" tree. The limit information is
853 // added to the loop table.
856 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
858 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
861 genTreeOps updateOper;
862 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
863 if (iterVar != BAD_VAR_NUM)
865 // We have v = v op y type asg node.
878 // Increment should be by a const int.
879 // TODO-CQ: CLONE: allow variable increments.
880 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
889 //----------------------------------------------------------------------------------
890 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
893 // from, to - are blocks (beg, end) which are part of the loop.
894 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
895 // pIterVar - see return value.
898 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
902 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
903 // assigned in the loop.
905 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
908 unsigned iterVar = optIsLoopIncrTree(incr);
909 if (iterVar == BAD_VAR_NUM)
913 if (optIsVarAssigned(from, to, incr, iterVar))
915 JITDUMP("iterVar is assigned in loop\n");
923 //----------------------------------------------------------------------------------
924 // optIsLoopTestEvalIntoTemp:
925 // Pattern match if the test tree is computed into a tmp
926 // and the "tmp" is used as jump condition for loop termination.
929 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
930 // where Vtmp contains the actual loop test result.
931 // newStmt - contains the statement that is the actual test stmt involving
932 // the loop iterator.
935 // Returns true if a new test tree can be obtained.
938 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
939 // Then returns the rhs for def of Vtmp as the "test" node.
942 // This method just retrieves what it thinks is the "test" node,
943 // the callers are expected to verify that "iterVar" is used in the test.
945 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
947 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
949 if (test->gtOper != GT_JTRUE)
954 GenTreePtr relop = test->gtGetOp1();
955 noway_assert(relop->OperIsCompare());
957 GenTreePtr opr1 = relop->gtOp.gtOp1;
958 GenTreePtr opr2 = relop->gtOp.gtOp2;
960 // Make sure we have jtrue (vtmp != 0)
961 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
962 opr2->IsIntegralConst(0))
964 // Get the previous statement to get the def (rhs) of Vtmp to see
965 // if the "test" is evaluated into Vtmp.
966 GenTreePtr prevStmt = testStmt->gtPrev;
967 if (prevStmt == nullptr)
972 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
973 if (tree->OperGet() == GT_ASG)
975 GenTreePtr lhs = tree->gtOp.gtOp1;
976 GenTreePtr rhs = tree->gtOp.gtOp2;
978 // Return as the new test node.
979 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
981 if (rhs->OperIsCompare())
992 //----------------------------------------------------------------------------------
993 // optExtractInitTestIncr:
994 // Extract the "init", "test" and "incr" nodes of the loop.
997 // head - Loop head block
998 // bottom - Loop bottom block
999 // top - Loop top block
1000 // ppInit - The init stmt of the loop if found.
1001 // ppTest - The test stmt of the loop if found.
1002 // ppIncr - The incr stmt of the loop if found.
1005 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1006 // returns true. Returns false if the information can't be extracted.
1009 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1010 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1011 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1014 // +-------<-----------------<-----------+
1017 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1019 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1020 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1021 // in the jtrue condition.
1024 // This method just retrieves what it thinks is the "test" node,
1025 // the callers are expected to verify that "iterVar" is used in the test.
1027 bool Compiler::optExtractInitTestIncr(
1028 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1030 assert(ppInit != nullptr);
1031 assert(ppTest != nullptr);
1032 assert(ppIncr != nullptr);
1034 // Check if last two statements in the loop body are the increment of the iterator
1035 // and the loop termination test.
1036 noway_assert(bottom->bbTreeList != nullptr);
1037 GenTreePtr test = bottom->bbTreeList->gtPrev;
1038 noway_assert(test != nullptr && test->gtNext == nullptr);
1041 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1046 // Check if we have the incr tree before the test tree, if we don't,
1047 // check if incr is part of the loop "top".
1048 GenTreePtr incr = test->gtPrev;
1049 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1051 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1056 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1057 GenTreePtr topLast = top->bbTreeList->gtPrev;
1058 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1068 assert(test != incr);
1070 // Find the last statement in the loop pre-header which we expect to be the initialization of
1071 // the loop iterator.
1072 GenTreePtr phdr = head->bbTreeList;
1073 if (phdr == nullptr)
1078 GenTreePtr init = phdr->gtPrev;
1079 noway_assert(init != nullptr && (init->gtNext == nullptr));
1081 // If it is a duplicated loop condition, skip it.
1082 if (init->gtFlags & GTF_STMT_CMPADD)
1084 // Must be a duplicated loop condition.
1085 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1086 init = init->gtPrev;
1087 noway_assert(init != nullptr);
1090 noway_assert(init->gtOper == GT_STMT);
1091 noway_assert(test->gtOper == GT_STMT);
1092 noway_assert(incr->gtOper == GT_STMT);
1094 *ppInit = init->gtStmt.gtStmtExpr;
1095 *ppTest = test->gtStmt.gtStmtExpr;
1096 *ppIncr = incr->gtStmt.gtStmtExpr;
1101 /*****************************************************************************
1103 * Record the loop in the loop table.
1106 void Compiler::optRecordLoop(BasicBlock* head,
1112 unsigned char exitCnt)
1114 // Record this loop in the table, if there's room.
1116 assert(optLoopCount <= MAX_LOOP_NUM);
1117 if (optLoopCount == MAX_LOOP_NUM)
1120 loopOverflowThisMethod = true;
1125 // Assumed preconditions on the loop we're adding.
1126 assert(first->bbNum <= top->bbNum);
1127 assert(top->bbNum <= entry->bbNum);
1128 assert(entry->bbNum <= bottom->bbNum);
1129 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1131 // If the new loop contains any existing ones, add it in the right place.
1132 unsigned char loopInd = optLoopCount;
1133 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1135 unsigned char prev = prevPlus1 - 1;
1136 if (optLoopTable[prev].lpContainedBy(first, bottom))
1141 // Move up any loops if necessary.
1142 for (unsigned j = optLoopCount; j > loopInd; j--)
1144 optLoopTable[j] = optLoopTable[j - 1];
1148 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1150 // The loop is well-formed.
1151 assert(optLoopTable[i].lpWellFormed());
1152 // Check for disjoint.
1153 if (optLoopTable[i].lpDisjoint(first, bottom))
1157 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1158 assert(optLoopTable[i].lpContainedBy(first, bottom));
1162 optLoopTable[loopInd].lpHead = head;
1163 optLoopTable[loopInd].lpFirst = first;
1164 optLoopTable[loopInd].lpTop = top;
1165 optLoopTable[loopInd].lpBottom = bottom;
1166 optLoopTable[loopInd].lpEntry = entry;
1167 optLoopTable[loopInd].lpExit = exit;
1168 optLoopTable[loopInd].lpExitCnt = exitCnt;
1170 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1171 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1172 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1174 optLoopTable[loopInd].lpFlags = 0;
1176 // We haven't yet recorded any side effects.
1177 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1178 optLoopTable[loopInd].lpFieldsModified = nullptr;
1179 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1181 // If DO-WHILE loop mark it as such.
1182 if (head->bbNext == entry)
1184 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1187 // If single exit loop mark it as such.
1191 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1195 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1196 // We have the following restrictions:
1197 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1198 // 2. There must be a loop iterator (a local var) that is
1199 // incremented (decremented or lsh, rsh, mul) with a constant value
1200 // 3. The iterator is incremented exactly once
1201 // 4. The loop condition must use the iterator.
1203 if (bottom->bbJumpKind == BBJ_COND)
1208 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1213 unsigned iterVar = BAD_VAR_NUM;
1214 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1219 // Make sure the "iterVar" initialization is never skipped,
1220 // i.e. HEAD dominates the ENTRY.
1221 if (!fgDominate(head, entry))
1226 if (!optPopulateInitInfo(loopInd, init, iterVar))
1231 // Check that the iterator is used in the loop condition.
1232 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1237 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1238 // Record the iterator, the pointer to the test node
1239 // and the initial value of the iterator (constant or local var)
1240 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1243 optLoopTable[loopInd].lpIterTree = incr;
1246 // Save the initial value of the iterator - can be lclVar or constant
1247 // Flag the loop accordingly.
1253 simpleTestLoopCount++;
1256 // Check if a constant iteration loop.
1257 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1259 // This is a constant loop.
1260 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1262 constIterLoopCount++;
1269 printf("\nConstant loop initializer:\n");
1272 printf("\nConstant loop body:\n");
1274 BasicBlock* block = head;
1277 block = block->bbNext;
1278 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1280 if (stmt->gtStmt.gtStmtExpr == incr)
1285 gtDispTree(stmt->gtStmt.gtStmtExpr);
1287 } while (block != bottom);
1293 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1298 //------------------------------------------------------------------------
1299 // optPrintLoopRecording: Print a recording of the loop.
1302 // loopInd - loop index.
1304 void Compiler::optPrintLoopRecording(unsigned loopInd)
1306 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1307 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1308 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1309 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1310 optLoopTable[loopInd].lpExit);
1312 // If an iterator loop print the iterator and the initialization.
1313 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1315 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1317 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1319 printf("%d )", optLoopTable[loopInd].lpIterConst());
1321 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1323 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1325 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1327 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1330 // If a simple test condition print operator and the limits */
1331 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1333 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1335 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1338 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1340 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1349 void Compiler::optCheckPreds()
1352 BasicBlock* blockPred;
1355 for (block = fgFirstBB; block; block = block->bbNext)
1357 for (pred = block->bbPreds; pred; pred = pred->flNext)
1359 // make sure this pred is part of the BB list
1360 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1362 if (blockPred == pred->flBlock)
1367 noway_assert(blockPred);
1368 switch (blockPred->bbJumpKind)
1371 if (blockPred->bbJumpDest == block)
1377 noway_assert(blockPred->bbNext == block);
1379 case BBJ_EHFILTERRET:
1381 case BBJ_EHCATCHRET:
1382 noway_assert(blockPred->bbJumpDest == block);
1393 /*****************************************************************************
1394 * Find the natural loops, using dominators. Note that the test for
1395 * a loop is slightly different from the standard one, because we have
1396 * not done a depth first reordering of the basic blocks.
1399 void Compiler::optFindNaturalLoops()
1404 printf("*************** In optFindNaturalLoops()\n");
1410 flowList* predEntry;
1412 noway_assert(fgDomsComputed);
1416 hasMethodLoops = false;
1417 loopsThisMethod = 0;
1418 loopOverflowThisMethod = false;
1421 /* We will use the following terminology:
1422 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1423 Not part of the looping of the loop.
1424 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1425 * but not the outer loop. ???)
1426 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1427 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1428 * EXIT - the loop exit or the block right after the bottom
1429 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1431 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1465 unsigned char exitCount;
1467 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1473 // Blocks that are rarely run have a zero bbWeight and should
1474 // never be optimized here
1476 if (top->bbWeight == BB_ZERO_WEIGHT)
1481 for (pred = top->bbPreds; pred; pred = pred->flNext)
1483 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1484 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1485 * as the definition says, but merely an indication that we have a loop there).
1486 * Thus, we have to be very careful and after entry discovery check that it is indeed
1487 * the only place we enter the loop (especially for non-reducible flow graphs).
1490 bottom = pred->flBlock;
1493 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1495 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1496 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1497 (bottom->bbJumpKind == BBJ_SWITCH))
1499 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1500 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1504 BasicBlock* loopBlock;
1506 /* The presence of a "back edge" is an indication that a loop might be present here
1509 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1510 * node in the loop to any other node in the loop (wholly within the loop)
1511 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1512 * in the loop from outside the loop, and that is through the ENTRY
1515 /* Let's find the loop ENTRY */
1517 if (head->bbJumpKind == BBJ_ALWAYS)
1519 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1521 /* OK - we enter somewhere within the loop */
1522 entry = head->bbJumpDest;
1524 /* some useful asserts
1525 * Cannot enter at the top - should have being caught by redundant jumps */
1527 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1531 /* special case - don't consider now */
1532 // assert (!"Loop entered in weird way!");
1536 // Can we fall through into the loop?
1537 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1539 /* The ENTRY is at the TOP (a do-while loop) */
1544 goto NO_LOOP; // head does not flow into the loop bail for now
1547 // Now we find the "first" block -- the earliest block reachable within the loop.
1548 // This is usually the same as "top", but can differ in rare cases where "top" is
1549 // the entry block of a nested loop, and that nested loop branches backwards to a
1550 // a block before "top". We find this by searching for such backwards branches
1551 // in the loop known so far.
1552 BasicBlock* first = top;
1553 BasicBlock* newFirst;
1554 bool blocksToSearch = true;
1555 BasicBlock* validatedAfter = bottom->bbNext;
1556 while (blocksToSearch)
1558 blocksToSearch = false;
1560 blocksToSearch = false;
1561 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1563 unsigned nSucc = loopBlock->NumSucc();
1564 for (unsigned j = 0; j < nSucc; j++)
1566 BasicBlock* succ = loopBlock->GetSucc(j);
1567 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1568 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1574 if (newFirst != nullptr)
1576 validatedAfter = first;
1578 blocksToSearch = true;
1582 // Is "head" still before "first"? If not, we don't have a valid loop...
1583 if (head->bbNum >= first->bbNum)
1586 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1587 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1591 /* Make sure ENTRY dominates all blocks in the loop
1592 * This is necessary to ensure condition 2. above
1593 * At the same time check if the loop has a single exit
1594 * point - those loops are easier to optimize */
1596 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1598 if (!fgDominate(entry, loopBlock))
1603 if (loopBlock == bottom)
1605 if (bottom->bbJumpKind != BBJ_ALWAYS)
1607 /* there is an exit at the bottom */
1609 noway_assert(bottom->bbJumpDest == top);
1616 BasicBlock* exitPoint;
1618 switch (loopBlock->bbJumpKind)
1621 case BBJ_CALLFINALLY:
1623 case BBJ_EHCATCHRET:
1624 assert(loopBlock->bbJumpDest);
1625 exitPoint = loopBlock->bbJumpDest;
1627 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1629 /* exit from a block other than BOTTOM */
1638 case BBJ_EHFINALLYRET:
1639 case BBJ_EHFILTERRET:
1640 /* The "try" associated with this "finally" must be in the
1641 * same loop, so the finally block will return control inside the loop */
1646 /* those are exits from the loop */
1654 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1655 BasicBlock** jumpTab;
1656 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1660 noway_assert(*jumpTab);
1661 exitPoint = *jumpTab;
1663 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1668 } while (++jumpTab, --jumpCnt);
1672 noway_assert(!"Unexpected bbJumpKind");
1677 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1678 * This is to ensure condition 1. above which prevents marking fake loops
1680 * Below is an example:
1688 * The example above is not a loop since we bail after the first iteration
1690 * The condition we have to check for is
1691 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1692 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1694 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1695 * loop bottom then we cannot iterate
1697 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1698 * part of the loop nodes (as per definition they are loop exits executed only once),
1699 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1701 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1703 switch (loopBlock->bbJumpKind)
1708 if (fgDominate(loopBlock, bottom))
1717 bool canIterateLoop = false;
1719 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1721 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1723 canIterateLoop = true;
1726 else if (predEntry->flBlock != head)
1728 // The entry block has multiple predecessors outside the loop; the 'head'
1729 // block isn't the only one. We only support a single 'head', so bail.
1734 if (!canIterateLoop)
1739 /* Double check - make sure that all loop blocks except ENTRY
1740 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1741 * us from considering non-loops due to incorrectly assuming that we had a back edge
1744 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1747 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1749 if (loopBlock == entry)
1754 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1756 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1758 // noway_assert(!"Found loop with multiple entries");
1764 // Disqualify loops where the first block of the loop is less nested in EH than
1765 // the bottom block. That is, we don't want to handle loops where the back edge
1766 // goes from within an EH region to a first block that is outside that same EH
1767 // region. Note that we *do* handle loops where the first block is the *first*
1768 // block of a more nested EH region (since it is legal to branch to the first
1769 // block of an immediately more nested EH region). So, for example, disqualify
1776 // BB10 BBJ_COND => BB02
1780 // Here, BB10 is more nested than BB02.
1782 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1784 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1786 first->bbNum, bottom->bbNum);
1790 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1791 // Disqualify loops where the first block of the loop is a finally target.
1792 // The main problem is when multiple loops share a 'first' block that is a finally
1793 // target and we canonicalize the loops by adding a new loop head. In that case, we
1794 // need to update the blocks so the finally target bit is moved to the newly created
1795 // block, and removed from the old 'first' block. This is 'hard', so at this point
1796 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1797 // long-term), it's easier to disallow the loop than to update the flow graph to
1798 // support this case.
1800 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1802 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1805 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1807 /* At this point we have a loop - record it in the loop table
1808 * If we found only one exit, record it in the table too
1809 * (otherwise an exit = 0 in the loop table means multiple exits) */
1816 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1819 if (!hasMethodLoops)
1821 /* mark the method as containing natural loops */
1823 hasMethodLoops = true;
1826 /* increment total number of loops found */
1830 /* keep track of the number of exits */
1831 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1832 #endif // COUNT_LOOPS
1835 /* current predecessor not good for a loop - continue with another one, if any */
1841 loopCountTable.record(loopsThisMethod);
1842 if (maxLoopsPerMethod < loopsThisMethod)
1844 maxLoopsPerMethod = loopsThisMethod;
1846 if (loopOverflowThisMethod)
1848 totalLoopOverflows++;
1850 #endif // COUNT_LOOPS
1852 // Now the loop indices are stable. We can figure out parent/child relationships
1853 // (using table indices to name loops), and label blocks.
1854 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1856 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1859 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1861 optLoopTable[loopInd].lpParent = possibleParent;
1862 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1863 optLoopTable[possibleParent].lpChild = loopInd;
1869 // Now label the blocks with the innermost loop to which they belong. Since parents
1870 // precede children in the table, doing the labeling for each loop in order will achieve
1871 // this -- the innermost loop labeling will be done last.
1872 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1874 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1875 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1876 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1878 blk->bbNatLoopNum = loopInd;
1883 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1887 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1888 // one, if necessary, for loops containing others that share a "top."
1890 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1892 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1893 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1899 if (optCanonicalizeLoopNest(loopInd))
1906 fgUpdateChangedFlowGraph();
1910 if (verbose && optLoopCount > 0)
1912 printf("\nFinal natural loop table:\n");
1913 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1915 optPrintLoopInfo(loopInd);
1922 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1924 BasicBlock* newJumpDest = nullptr;
1925 switch (blk->bbJumpKind)
1930 case BBJ_EHFILTERRET:
1931 case BBJ_EHFINALLYRET:
1932 case BBJ_EHCATCHRET:
1933 // These have no jump destination to update.
1938 case BBJ_CALLFINALLY:
1940 // All of these have a single jump destination to update.
1941 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1943 blk->bbJumpDest = newJumpDest;
1949 bool redirected = false;
1950 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1952 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1954 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1958 // If any redirections happend, invalidate the switch table map for the switch.
1961 GetSwitchDescMap()->Remove(blk);
1971 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1972 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1974 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1976 // copy the jump destination(s) from "from" to "to".
1977 switch (to->bbJumpKind)
1981 case BBJ_CALLFINALLY:
1983 // All of these have a single jump destination to update.
1984 to->bbJumpDest = from->bbJumpDest;
1989 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1990 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1991 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
1993 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
1995 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2005 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2006 // Returns 'true' if the flow graph is modified.
2007 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2009 bool modified = false;
2011 // Is the top of the current loop not in any nested loop?
2012 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2014 if (optCanonicalizeLoop(loopInd))
2020 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2021 child = optLoopTable[child].lpSibling)
2023 if (optCanonicalizeLoopNest(child))
2032 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2034 // Is the top uniquely part of the current loop?
2035 BasicBlock* t = optLoopTable[loopInd].lpTop;
2037 if (t->bbNatLoopNum == loopInd)
2042 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2044 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2046 // Otherwise, the top of this loop is also part of a nested loop.
2048 // Insert a new unique top for this loop. We must be careful to put this new
2049 // block in the correct EH region. Note that f->bbPrev might be in a different
2050 // EH region. For example:
2058 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2059 // On the other hand, the first block of multiple loops might be the first
2060 // block of a 'try' region that is completely contained in the multiple loops.
2065 // BB10 BBJ_ALWAYS => BB08
2067 // BB12 BBJ_ALWAYS => BB08
2069 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2070 // is a single-block "try" region. Neither loop "bottom" block is in the same
2071 // "try" region as BB08. This is legal because you can jump to the first block
2072 // of a try region. With EH normalization, no two "try" regions will share
2073 // this block. In this case, we need to insert a new block for the outer loop
2074 // in the same EH region as the branch from the "bottom":
2079 // BB10 BBJ_ALWAYS => BB08
2081 // BB12 BBJ_ALWAYS => BB30
2083 // Another possibility is that the "first" block of the loop nest can be the first block
2084 // of a "try" region that also has other predecessors than those in the loop, or even in
2085 // the "try" region (since blocks can target the first block of a "try" region). For example:
2089 // BB10 BBJ_ALWAYS => BB08
2091 // BB12 BBJ_ALWAYS => BB08
2094 // BB20 BBJ_ALWAYS => BB08
2096 // BB25 BBJ_ALWAYS => BB08
2098 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2099 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2100 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2101 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2102 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2103 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2104 // situation of branching to a non-first block of a 'try' region.
2106 // We can also have a loop nest where the "first" block is outside of a "try" region
2107 // and the back edges are inside a "try" region, for example:
2111 // BB09 try { BBJ_COND => BB02
2113 // BB15 BBJ_COND => BB02
2115 // BB21 } // end of "try"
2117 // In this case, both loop back edges were formed by "leave" instructions that were
2118 // imported into branches that were later made conditional. In this case, we don't
2119 // want to copy the EH region of the back edge, since that would create a block
2120 // outside of and disjoint with the "try" region of the back edge. However, to
2121 // simplify things, we disqualify this type of loop, so we should never see this here.
2123 BasicBlock* h = optLoopTable[loopInd].lpHead;
2124 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2125 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2127 // The loop must be entirely contained within a single handler region.
2128 assert(BasicBlock::sameHndRegion(f, b));
2130 // If the bottom block is in the same "try" region, then we extend the EH
2131 // region. Otherwise, we add the new block outside the "try" region.
2132 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2133 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2136 // We need to set the EH region manually. Set it to be the same
2137 // as the bottom block.
2138 newT->copyEHRegion(b);
2141 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2143 // Redirect the "bottom" of the current loop to "newT".
2144 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2145 blockMap->Set(t, newT);
2146 optRedirectBlock(b, blockMap);
2148 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2149 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2150 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2151 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2154 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2155 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2156 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2157 // edge of the blockMap, so nothing will happen.
2158 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2160 BasicBlock* topPredBlock = topPred->flBlock;
2162 // Skip if topPredBlock is in the loop.
2163 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2164 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2165 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2166 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2168 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2169 "redirecting its bottom edge\n",
2170 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2174 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2176 optRedirectBlock(topPredBlock, blockMap);
2179 assert(newT->bbNext == f);
2182 newT->bbJumpKind = BBJ_ALWAYS;
2183 newT->bbJumpDest = t;
2184 newT->bbTreeList = nullptr;
2185 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2188 // If it had been a do-while loop (top == entry), update entry, as well.
2189 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2190 if (optLoopTable[loopInd].lpTop == origE)
2192 optLoopTable[loopInd].lpEntry = newT;
2194 optLoopTable[loopInd].lpTop = newT;
2195 optLoopTable[loopInd].lpFirst = newT;
2197 newT->bbNatLoopNum = loopInd;
2199 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2200 dspPtr(newT), loopInd);
2202 // Make sure the head block still goes to the entry...
2203 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2205 h->bbJumpKind = BBJ_ALWAYS;
2206 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2208 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2210 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2211 optLoopTable[loopInd].lpHead = h2;
2212 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2213 h2->bbTreeList = nullptr;
2214 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2217 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2218 // it must be the case that they were do-while's (since "h" fell through to the entry).
2219 // The new node "newT" becomes the head of such loops.
2220 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2221 childLoop = optLoopTable[childLoop].lpSibling)
2223 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2224 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2226 optUpdateLoopHead(childLoop, h, newT);
2232 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2234 assert(l1 != BasicBlock::NOT_IN_LOOP);
2239 else if (l2 == BasicBlock::NOT_IN_LOOP)
2245 return optLoopContains(l1, optLoopTable[l2].lpParent);
2249 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2251 assert(optLoopTable[loopInd].lpHead == from);
2252 optLoopTable[loopInd].lpHead = to;
2253 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2254 childLoop = optLoopTable[childLoop].lpSibling)
2256 if (optLoopTable[childLoop].lpHead == from)
2258 optUpdateLoopHead(childLoop, from, to);
2263 /*****************************************************************************
2264 * If the : i += const" will cause an overflow exception for the small types.
2267 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2274 type_MAX = SCHAR_MAX;
2277 type_MAX = UCHAR_MAX;
2280 type_MAX = SHRT_MAX;
2283 type_MAX = USHRT_MAX;
2286 case TYP_UINT: // Detected by checking for 32bit ....
2288 return false; // ... overflow same as done for TYP_INT
2294 if (iterAtExit > type_MAX)
2304 /*****************************************************************************
2305 * If the "i -= const" will cause an underflow exception for the small types
2308 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2315 type_MIN = SCHAR_MIN;
2318 type_MIN = SHRT_MIN;
2327 case TYP_UINT: // Detected by checking for 32bit ....
2329 return false; // ... underflow same as done for TYP_INT
2335 if (iterAtExit < type_MIN)
2345 /*****************************************************************************
2347 * Helper for unroll loops - Computes the number of repetitions
2348 * in a constant loop. If it cannot prove the number is constant returns false
2351 bool Compiler::optComputeLoopRep(int constInit,
2354 genTreeOps iterOper,
2355 var_types iterOperType,
2356 genTreeOps testOper,
2359 unsigned* iterCount)
2361 noway_assert(genActualType(iterOperType) == TYP_INT);
2364 __int64 constLimitX;
2369 // Using this, we can just do a signed comparison with other 32 bit values.
2372 constLimitX = (unsigned int)constLimit;
2376 constLimitX = (signed int)constLimit;
2379 switch (iterOperType)
2381 // For small types, the iteration operator will narrow these values if big
2383 #define INIT_ITER_BY_TYPE(type) \
2384 constInitX = (type)constInit; \
2385 iterInc = (type)iterInc;
2388 INIT_ITER_BY_TYPE(signed char);
2391 INIT_ITER_BY_TYPE(unsigned char);
2394 INIT_ITER_BY_TYPE(signed short);
2397 INIT_ITER_BY_TYPE(unsigned short);
2400 // For the big types, 32 bit arithmetic is performed
2406 constInitX = (unsigned int)constInit;
2410 constInitX = (signed int)constInit;
2415 noway_assert(!"Bad type");
2419 /* If iterInc is zero we have an infinite loop */
2425 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2426 iterSign = (iterInc > 0) ? +1 : -1;
2428 /* Initialize loopCount to zero */
2431 // If dupCond is true then the loop head contains a test which skips
2432 // this loop, if the constInit does not pass the loop test
2433 // Such a loop can execute zero times.
2434 // If dupCond is false then we have a true do-while loop which we
2435 // always execute the loop once before performing the loop test
2439 constInitX += iterInc;
2442 // bail if count is based on wrap-around math
2445 if (constLimitX < constInitX)
2450 else if (constLimitX > constInitX)
2455 /* Compute the number of repetitions */
2459 __int64 iterAtExitX;
2462 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2466 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2467 * have a constant number of iterations or loop forever -
2468 * we have to compute (lim-init) mod iterInc to see if it is zero.
2469 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2470 * which is probably not what the end user wanted, but it is legal.
2475 /* Stepping by one, i.e. Mod with 1 is always zero */
2478 if (((constLimitX - constInitX) % iterInc) != 0)
2486 noway_assert(iterInc < 0);
2487 /* Stepping by -1, i.e. Mod with 1 is always zero */
2490 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2506 if (constInitX != constLimitX)
2508 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2511 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2515 iterAtExitX = (unsigned)iterAtExitX;
2518 // Check if iteration incr will cause overflow for small types
2519 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2524 // iterator with 32bit overflow. Bad for TYP_(U)INT
2525 if (iterAtExitX < constLimitX)
2530 *iterCount = loopCount;
2546 noway_assert(!"Unknown operator for loop iterator");
2560 if (constInitX < constLimitX)
2562 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2565 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2569 iterAtExitX = (unsigned)iterAtExitX;
2572 // Check if iteration incr will cause overflow for small types
2573 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2578 // iterator with 32bit overflow. Bad for TYP_(U)INT
2579 if (iterAtExitX < constLimitX)
2584 *iterCount = loopCount;
2600 noway_assert(!"Unknown operator for loop iterator");
2614 if (constInitX <= constLimitX)
2616 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2619 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2623 iterAtExitX = (unsigned)iterAtExitX;
2626 // Check if iteration incr will cause overflow for small types
2627 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2632 // iterator with 32bit overflow. Bad for TYP_(U)INT
2633 if (iterAtExitX <= constLimitX)
2638 *iterCount = loopCount;
2654 noway_assert(!"Unknown operator for loop iterator");
2668 if (constInitX > constLimitX)
2670 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2673 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2677 iterAtExitX = (unsigned)iterAtExitX;
2680 // Check if small types will underflow
2681 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2686 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2687 if (iterAtExitX > constLimitX)
2692 *iterCount = loopCount;
2708 noway_assert(!"Unknown operator for loop iterator");
2722 if (constInitX >= constLimitX)
2724 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2727 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2731 iterAtExitX = (unsigned)iterAtExitX;
2734 // Check if small types will underflow
2735 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2740 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2741 if (iterAtExitX >= constLimitX)
2746 *iterCount = loopCount;
2762 noway_assert(!"Unknown operator for loop iterator");
2767 noway_assert(!"Unknown operator for loop condition");
2773 /*****************************************************************************
2775 * Look for loop unrolling candidates and unroll them
2779 #pragma warning(push)
2780 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2782 void Compiler::optUnrollLoops()
2784 if (compCodeOpt() == SMALL_CODE)
2789 if (optLoopCount == 0)
2795 if (JitConfig.JitNoUnroll())
2801 if (optCanCloneLoops())
2809 printf("*************** In optUnrollLoops()\n");
2812 /* Look for loop unrolling candidates */
2814 /* Double loop so that after unrolling an inner loop we set change to true
2815 * and we then go back over all of the loop candidates and try to unroll
2816 * the next outer loop, until we don't unroll any loops,
2817 * then change will be false and we are done.
2821 bool change = false;
2823 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2837 int lbeg; // initial value for iterator
2838 int llim; // limit value for iterator
2839 unsigned lvar; // iterator lclVar #
2840 int iterInc; // value to increment the iterator
2841 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2842 var_types iterOperType; // type result of the oper (for overflow instrs)
2843 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2844 bool unsTest; // Is the comparison u/int
2846 unsigned totalIter; // total number of iterations in the constant loop
2847 unsigned loopCostSz; // Cost is size of one iteration
2848 unsigned loopFlags; // actual lpFlags
2849 unsigned requiredFlags; // required lpFlags
2851 GenTree* loopList; // new stmt list of the unrolled loop
2854 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2861 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2862 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2864 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2867 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2873 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2880 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2881 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2883 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2886 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2888 unrollLimitSz *= 10;
2892 loopFlags = optLoopTable[lnum].lpFlags;
2893 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2895 /* Ignore the loop if we don't have a do-while with a single exit
2896 that has a constant number of iterations */
2898 if ((loopFlags & requiredFlags) != requiredFlags)
2903 /* ignore if removed or marked as not unrollable */
2905 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2910 head = optLoopTable[lnum].lpHead;
2912 bottom = optLoopTable[lnum].lpBottom;
2913 noway_assert(bottom);
2915 /* The single exit must be at the bottom of the loop */
2916 noway_assert(optLoopTable[lnum].lpExit);
2917 if (optLoopTable[lnum].lpExit != bottom)
2922 /* Unrolling loops with jumps in them is not worth the headache
2923 * Later we might consider unrolling loops after un-switching */
2928 block = block->bbNext;
2929 noway_assert(block);
2931 if (block->bbJumpKind != BBJ_NONE)
2933 if (block != bottom)
2938 } while (block != bottom);
2940 /* Get the loop data:
2944 - iterator increment
2945 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2946 - loop test type (i.e. GT_GE, GT_LT, etc...)
2949 lbeg = optLoopTable[lnum].lpConstInit;
2950 llim = optLoopTable[lnum].lpConstLimit();
2951 testOper = optLoopTable[lnum].lpTestOper();
2953 lvar = optLoopTable[lnum].lpIterVar();
2954 iterInc = optLoopTable[lnum].lpIterConst();
2955 iterOper = optLoopTable[lnum].lpIterOper();
2957 iterOperType = optLoopTable[lnum].lpIterOperType();
2958 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2960 if (lvaTable[lvar].lvAddrExposed)
2961 { // If the loop iteration variable is address-exposed then bail
2964 if (lvaTable[lvar].lvIsStructField)
2965 { // If the loop iteration variable is a promoted field from a struct then
2970 /* Locate the pre-header and initialization and increment/test statements */
2972 phdr = head->bbTreeList;
2974 loop = bottom->bbTreeList;
2977 init = head->lastStmt();
2978 noway_assert(init && (init->gtNext == nullptr));
2979 test = bottom->lastStmt();
2980 noway_assert(test && (test->gtNext == nullptr));
2981 incr = test->gtPrev;
2984 if (init->gtFlags & GTF_STMT_CMPADD)
2986 /* Must be a duplicated loop condition */
2987 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2990 init = init->gtPrev;
2998 /* Find the number of iterations - the function returns false if not a constant number */
3000 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3005 /* Forget it if there are too many repetitions or not a constant loop */
3007 if (totalIter > iterLimit)
3012 noway_assert(init->gtOper == GT_STMT);
3013 init = init->gtStmt.gtStmtExpr;
3014 noway_assert(test->gtOper == GT_STMT);
3015 test = test->gtStmt.gtStmtExpr;
3016 noway_assert(incr->gtOper == GT_STMT);
3017 incr = incr->gtStmt.gtStmtExpr;
3019 // Don't unroll loops we don't understand.
3020 if (incr->gtOper == GT_ASG)
3025 /* Make sure everything looks ok */
3026 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3027 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3028 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3030 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
3031 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
3032 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3034 (test->gtOper != GT_JTRUE))
3036 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3040 /* heuristic - Estimated cost in code size of the unrolled loop */
3048 block = block->bbNext;
3050 /* Visit all the statements in the block */
3052 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3054 /* Get the expression and stop if end reached */
3056 GenTreePtr expr = stmt->gtStmtExpr;
3062 /* Calculate gtCostSz */
3063 gtSetStmtInfo(stmt);
3065 /* Update loopCostSz */
3066 loopCostSz += stmt->gtCostSz;
3068 } while (block != bottom);
3070 /* Compute the estimated increase in code size for the unrolled loop */
3072 unsigned int fixedLoopCostSz;
3073 fixedLoopCostSz = 8;
3076 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3078 /* Don't unroll if too much code duplication would result. */
3080 if (unrollCostSz > unrollLimitSz)
3082 /* prevent this loop from being revisited */
3083 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3087 /* Looks like a good idea to unroll this loop, let's do it! */
3088 CLANG_FORMAT_COMMENT_ANCHOR;
3093 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3094 if (head->bbNext->bbNum != bottom->bbNum)
3096 printf("..BB%02u", bottom->bbNum);
3098 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3099 printf(" unrollCostSz = %d\n", unrollCostSz);
3104 /* Create the unrolled loop statement list */
3106 loopList = loopLast = nullptr;
3108 for (lval = lbeg; totalIter; totalIter--)
3117 block = block->bbNext;
3118 noway_assert(block);
3120 /* Visit all the statements in the block */
3122 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3124 /* Stop if we've reached the end of the loop */
3126 if (stmt->gtStmtExpr == incr)
3131 /* Clone/substitute the expression */
3133 expr = gtCloneExpr(stmt, 0, lvar, lval);
3135 // cloneExpr doesn't handle everything
3139 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3143 /* Append the expression to our list */
3147 loopLast->gtNext = expr;
3154 expr->gtPrev = loopLast;
3157 } while (block != bottom);
3159 /* update the new value for the unrolled iterator */
3173 noway_assert(!"Unrolling not implemented for this loop iterator");
3177 noway_assert(!"Unknown operator for constant loop iterator");
3182 /* Finish the linked list */
3186 loopList->gtPrev = loopLast;
3187 loopLast->gtNext = nullptr;
3190 /* Replace the body with the unrolled one */
3196 block = block->bbNext;
3197 noway_assert(block);
3198 block->bbTreeList = nullptr;
3199 block->bbJumpKind = BBJ_NONE;
3200 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3201 } while (block != bottom);
3203 bottom->bbJumpKind = BBJ_NONE;
3204 bottom->bbTreeList = loopList;
3205 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3206 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3210 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3212 /* Update bbRefs and bbPreds */
3213 /* Here head->bbNext is bottom !!! - Replace it */
3215 fgRemoveRefPred(head->bbNext, bottom);
3217 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3218 * (the last value of the iterator in the loop)
3219 * and drop the jump condition since the unrolled loop will always execute */
3221 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3223 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3225 if (head->bbJumpKind == BBJ_COND)
3227 phdr = head->bbTreeList;
3229 test = phdr->gtPrev;
3231 noway_assert(test && (test->gtNext == nullptr));
3232 noway_assert(test->gtOper == GT_STMT);
3233 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3235 init = test->gtPrev;
3236 noway_assert(init && (init->gtNext == test));
3237 noway_assert(init->gtOper == GT_STMT);
3239 init->gtNext = nullptr;
3240 phdr->gtPrev = init;
3241 head->bbJumpKind = BBJ_NONE;
3242 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3244 /* Update bbRefs and bbPreds */
3246 fgRemoveRefPred(head->bbJumpDest, head);
3250 /* the loop must execute */
3251 noway_assert(head->bbJumpKind == BBJ_NONE);
3257 printf("Whole unrolled loop:\n");
3259 GenTreePtr s = loopList;
3263 noway_assert(s->gtOper == GT_STMT);
3274 /* Remember that something has changed */
3278 /* Make sure to update loop table */
3280 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3281 * (also make head and bottom NULL - to hit an assert or GPF) */
3283 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3284 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3296 fgDebugCheckBBlist();
3300 #pragma warning(pop)
3303 /*****************************************************************************
3305 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3306 * not execute a method call.
3309 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3311 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3312 // as some helper calls are neither interruptible nor hijackable.
3313 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3314 // those helpers too.
3316 noway_assert(topBB->bbNum <= botBB->bbNum);
3318 // We can always check topBB and botBB for any gc safe points and early out
3320 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3325 // Otherwise we will need to rely upon the dominator sets
3327 if (!fgDomsComputed)
3329 // return a conservative answer of true when we don't have the dominator sets
3333 BasicBlock* curBB = topBB;
3336 noway_assert(curBB);
3338 // If we added a loop pre-header block then we will
3339 // have a bbNum greater than fgLastBB, and we won't have
3340 // any dominator information about this block, so skip it.
3342 if (curBB->bbNum <= fgLastBB->bbNum)
3344 noway_assert(curBB->bbNum <= botBB->bbNum);
3346 // Does this block contain a gc safe point?
3348 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3350 // Will this block always execute on the way to botBB ?
3352 // Since we are checking every block in [topBB .. botBB] and we are using
3353 // a lexical definition of a loop.
3354 // (all that we know is that is that botBB is a back-edge to topBB)
3355 // Thus while walking blocks in this range we may encounter some blocks
3356 // that are not really part of the loop, and so we need to perform
3357 // some additional checks:
3359 // We will check that the current 'curBB' is reachable from 'topBB'
3360 // and that it dominates the block containing the back-edge 'botBB'
3361 // When both of these are true then we know that the gcsafe point in 'curBB'
3362 // will be encountered in the loop and we can return false
3364 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3371 // If we've reached the destination block, then we're done
3380 curBB = curBB->bbNext;
3383 // If we didn't find any blocks that contained a gc safe point and
3384 // also met the fgDominate and fgReachable criteria then we must return true
3389 /*****************************************************************************
3391 * Find the loop termination test at the bottom of the loop
3394 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3396 GenTreePtr testt = bottom->bbTreeList;
3398 assert(testt && testt->gtOper == GT_STMT);
3400 GenTreePtr result = testt->gtPrev;
3403 while (testt->gtNext)
3405 testt = testt->gtNext;
3408 assert(testt == result);
3414 /*****************************************************************************
3415 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3418 void Compiler::fgOptWhileLoop(BasicBlock* block)
3420 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3421 noway_assert(compCodeOpt() != SMALL_CODE);
3424 Optimize while loops into do { } while loop
3425 Our loop hoisting logic requires do { } while loops.
3426 Specifically, we're looking for the following case:
3437 If we find this, and the condition is simple enough, we change
3438 the loop to the following:
3443 // else fall-through
3454 /* Does the BB end with an unconditional jump? */
3456 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3457 { // It can't be one of the ones we use for our exception magic
3461 // It has to be a forward jump
3462 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3464 if (fgIsForwardBranch(block) == false)
3469 // Get hold of the jump target
3470 BasicBlock* bTest = block->bbJumpDest;
3472 // Does the block consist of 'jtrue(cond) block' ?
3473 if (bTest->bbJumpKind != BBJ_COND)
3478 // bTest must be a backwards jump to block->bbNext
3479 if (bTest->bbJumpDest != block->bbNext)
3484 // Since test is a BBJ_COND it will have a bbNext
3485 noway_assert(bTest->bbNext);
3487 // 'block' must be in the same try region as the condition, since we're going to insert
3488 // a duplicated condition in 'block', and the condition might include exception throwing code.
3489 if (!BasicBlock::sameTryRegion(block, bTest))
3494 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3495 // same try region (or no try region) to avoid generating illegal flow.
3496 BasicBlock* bTestNext = bTest->bbNext;
3497 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3502 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3504 // bTest must only contain only a jtrue with no other stmts, we will only clone
3505 // the conditional, so any other statements will not get cloned
3506 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3508 if (bTest->bbTreeList != condStmt)
3513 /* Get to the condition node from the statement tree */
3515 noway_assert(condStmt->gtOper == GT_STMT);
3517 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3518 noway_assert(condTree->gtOper == GT_JTRUE);
3520 condTree = condTree->gtOp.gtOp1;
3522 // The condTree has to be a RelOp comparison
3523 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3525 if (condTree->OperIsCompare() == false)
3530 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3532 gtPrepareCost(condTree);
3533 unsigned estDupCostSz = condTree->gtCostSz;
3535 double loopIterations = (double)BB_LOOP_WEIGHT;
3537 bool allProfileWeightsAreValid = false;
3538 BasicBlock::weight_t weightBlock = block->bbWeight;
3539 BasicBlock::weight_t weightTest = bTest->bbWeight;
3540 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3542 // If we have profile data then we calculate the number of time
3543 // the loop will iterate into loopIterations
3544 if (fgIsUsingProfileWeights())
3546 // Only rely upon the profile weight when all three of these blocks
3547 // have good profile weights
3548 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3549 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3551 allProfileWeightsAreValid = true;
3553 // If this while loop never iterates then don't bother transforming
3554 if (weightNext == 0)
3559 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3560 // if the profile weights are all valid.
3562 // weightNext is the number of time this loop iterates
3563 // weightBlock is the number of times that we enter the while loop
3564 // loopIterations is the average number of times that this loop iterates
3566 if (weightTest >= weightBlock)
3568 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3573 unsigned maxDupCostSz = 32;
3575 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3576 // set loop weights yet
3577 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3582 // If this loop iterates a lot then raise the maxDupCost
3583 if (loopIterations >= 12.0)
3587 if (loopIterations >= 96.0)
3592 // If the loop condition has a shared static helper, we really want this loop converted
3593 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3594 // be executed on every loop iteration.
3595 int countOfHelpers = 0;
3596 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3598 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3600 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3603 // If the compare has too high cost then we don't want to dup
3605 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3610 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3611 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3612 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3613 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3614 allProfileWeightsAreValid ? "true" : "false");
3623 /* Looks good - duplicate the condition test */
3625 condTree->gtFlags |= GTF_RELOP_ZTT;
3627 condTree = gtCloneExpr(condTree);
3628 gtReverseCond(condTree);
3630 // Make sure clone expr copied the flag
3631 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3633 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3635 /* Create a statement entry out of the condition and
3636 append the condition test at the end of 'block' */
3638 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3640 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3642 if (opts.compDbgInfo)
3644 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3647 // Flag the block that received the copy as potentially having an array/vtable
3648 // reference if the block copied from did; this is a conservative guess.
3649 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3651 block->bbFlags |= copyFlags;
3654 // If we have profile data for all blocks and we know that we are cloning the
3655 // bTest block into block and thus changing the control flow from block so
3656 // that it no longer goes directly to bTest anymore, we have to adjust the
3657 // weight of bTest by subtracting out the weight of block.
3659 if (allProfileWeightsAreValid)
3662 // Some additional sanity checks before adjusting the weight of bTest
3664 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3666 // Get the two edge that flow out of bTest
3667 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3668 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3670 // Calculate the new weight for block bTest
3672 BasicBlock::weight_t newWeightTest =
3673 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3674 bTest->bbWeight = newWeightTest;
3676 if (newWeightTest == BB_ZERO_WEIGHT)
3678 bTest->bbFlags |= BBF_RUN_RARELY;
3679 // All out edge weights are set to zero
3680 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3681 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3682 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3683 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3687 // Update the our edge weights
3688 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3689 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3690 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3691 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3696 /* Change the block to end with a conditional jump */
3698 block->bbJumpKind = BBJ_COND;
3699 block->bbJumpDest = bTest->bbNext;
3701 /* Mark the jump dest block as being a jump target */
3702 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3704 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3706 fgAddRefPred(block->bbNext, block);
3708 fgRemoveRefPred(bTest, block);
3709 fgAddRefPred(bTest->bbNext, block);
3714 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3716 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3718 gtDispTree(copyOfCondStmt);
3724 /*****************************************************************************
3726 * Optimize the BasicBlock layout of the method
3729 void Compiler::optOptimizeLayout()
3731 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3736 printf("*************** In optOptimizeLayout()\n");
3740 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3741 fgDebugCheckBBlist();
3744 noway_assert(fgModified == false);
3746 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3748 /* Make sure the appropriate fields are initialized */
3750 if (block->bbWeight == BB_ZERO_WEIGHT)
3752 /* Zero weighted block can't have a LOOP_HEAD flag */
3753 noway_assert(block->isLoopHead() == false);
3757 assert(block->bbLoopNum == 0);
3759 if (compCodeOpt() != SMALL_CODE)
3761 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3763 fgOptWhileLoop(block);
3769 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3770 fgComputeEdgeWeights();
3773 fgUpdateFlowGraph(true);
3775 fgUpdateFlowGraph();
3778 /*****************************************************************************
3780 * Perform loop inversion, find and classify natural loops
3783 void Compiler::optOptimizeLoops()
3785 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3790 printf("*************** In optOptimizeLoops()\n");
3794 optSetBlockWeights();
3796 /* Were there any loops in the flow graph? */
3800 /* now that we have dominator information we can find loops */
3802 optFindNaturalLoops();
3804 unsigned loopNum = 0;
3806 /* Iterate over the flow graph, marking all loops */
3808 /* We will use the following terminology:
3809 * top - the first basic block in the loop (i.e. the head of the backward edge)
3810 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3811 * lastBottom - used when we have multiple back-edges to the same top
3818 for (top = fgFirstBB; top; top = top->bbNext)
3820 BasicBlock* foundBottom = nullptr;
3822 for (pred = top->bbPreds; pred; pred = pred->flNext)
3824 /* Is this a loop candidate? - We look for "back edges" */
3826 BasicBlock* bottom = pred->flBlock;
3828 /* is this a backward edge? (from BOTTOM to TOP) */
3830 if (top->bbNum > bottom->bbNum)
3835 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3837 if (top->isLoopHead() == false)
3842 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3844 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3849 /* the top block must be able to reach the bottom block */
3850 if (!fgReachable(top, bottom))
3855 /* Found a new loop, record the longest backedge in foundBottom */
3857 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3859 foundBottom = bottom;
3867 /* Mark the loop header as such */
3868 assert(FitsIn<unsigned char>(loopNum));
3869 top->bbLoopNum = (unsigned char)loopNum;
3872 /* Mark all blocks between 'top' and 'bottom' */
3874 optMarkLoopBlocks(top, foundBottom, false);
3877 // We track at most 255 loops
3881 totalUnnatLoopOverflows++;
3888 totalUnnatLoopCount += loopNum;
3896 printf("\nFound a total of %d loops.", loopNum);
3897 printf("\nAfter loop weight marking:\n");
3898 fgDispBasicBlocks();
3903 optLoopsMarked = true;
3907 //------------------------------------------------------------------------
3908 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3911 // loopNum - the current loop index for which conditions are derived.
3912 // context - data structure where all loop cloning info is kept.
3915 // "false" if conditions cannot be obtained. "true" otherwise.
3916 // The cloning conditions are updated in the "conditions"[loopNum] field
3917 // of the "context" parameter.
3920 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3921 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3922 // condition is "less than". If the initializer is "var" init then adds condition
3923 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3924 // are added to "context". These conditions are checked in the pre-header block
3925 // and the cloning choice is made.
3928 // Callers should assume AND operation is used i.e., if all conditions are
3929 // true, then take the fast path.
3931 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3933 JITDUMP("------------------------------------------------------------\n");
3934 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3936 LoopDsc* loop = &optLoopTable[loopNum];
3937 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3939 if (loop->lpTestOper() == GT_LT)
3941 // Stride conditions
3942 if (loop->lpIterConst() <= 0)
3944 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3949 if (loop->lpFlags & LPFLG_CONST_INIT)
3951 // Only allowing const init at this time.
3952 if (loop->lpConstInit < 0)
3954 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3958 else if (loop->lpFlags & LPFLG_VAR_INIT)
3961 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3962 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3963 context->EnsureConditions(loopNum)->Push(geZero);
3967 JITDUMP("> Not variable init\n");
3973 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3975 int limit = loop->lpConstLimit();
3978 JITDUMP("> limit %d is invalid\n", limit);
3981 ident = LC_Ident(limit, LC_Ident::Const);
3983 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3985 unsigned limitLcl = loop->lpVarLimit();
3986 ident = LC_Ident(limitLcl, LC_Ident::Var);
3988 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
3990 context->EnsureConditions(loopNum)->Push(geZero);
3992 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
3994 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
3995 if (!loop->lpArrLenLimit(this, index))
3997 JITDUMP("> ArrLen not matching");
4000 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4002 // Ensure that this array must be dereference-able, before executing the actual condition.
4003 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4004 context->EnsureDerefs(loopNum)->Push(array);
4008 JITDUMP("> Undetected limit\n");
4012 for (unsigned i = 0; i < optInfos->Size(); ++i)
4014 LcOptInfo* optInfo = optInfos->GetRef(i);
4015 switch (optInfo->GetOptType())
4017 case LcOptInfo::LcJaggedArray:
4020 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4021 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4022 LC_Ident arrLenIdent = LC_Ident(arrLen);
4024 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4025 context->EnsureConditions(loopNum)->Push(cond);
4027 // Ensure that this array must be dereference-able, before executing the actual condition.
4028 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4029 context->EnsureDerefs(loopNum)->Push(array);
4032 case LcOptInfo::LcMdArray:
4034 // limit <= mdArrLen
4035 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4036 LC_Condition cond(GT_LE, LC_Expr(ident),
4037 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4038 mdArrInfo->GetArrIndexForDim(getAllocator()),
4039 mdArrInfo->dim, LC_Array::None))));
4040 context->EnsureConditions(loopNum)->Push(cond);
4045 JITDUMP("Unknown opt\n");
4049 JITDUMP("Conditions: (");
4050 DBEXEC(verbose, context->PrintConditions(loopNum));
4057 //------------------------------------------------------------------------------------
4058 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4061 // loopNum - the current loop index for which conditions are derived.
4062 // context - data structure where all loop cloning info is kept.
4065 // "false" if conditions cannot be obtained. "true" otherwise.
4066 // The deref conditions are updated in the "derefConditions"[loopNum] field
4067 // of the "context" parameter.
4069 // Definition of Deref Conditions:
4070 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4071 // we should first be able to dereference "a". i.e., "a" is non-null.
4077 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4078 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4081 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4082 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4083 // be true to do the deref.
4085 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4087 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4088 // blocks each of which will branch to slow path if the condition evaluates to false.
4090 // Now, imagine a situation where we have
4091 // a[x][y][k] = 20 and a[i][j][k] = 0
4092 // also in the inner most loop where x, y are parameters, then our conditions will have
4096 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4098 // But these conditions can be checked together with conditions
4099 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4102 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4103 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4104 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4105 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4107 // This naturally yields a tree style pattern, where the nodes of the tree are
4108 // the array and indices respectively.
4124 // Notice that the variables in the same levels can have their conditions combined in the
4125 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4126 // combined with a short-circuiting AND (i.e., different basic blocks).
4129 // Construct a tree of array indices and the array which will generate the optimal
4130 // conditions for loop cloning.
4132 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4147 // In this method, we will construct such a tree by descending depth first into the array
4148 // index operation and forming a tree structure as we encounter the array or the index variables.
4150 // This tree structure will then be used to generate conditions like below:
4151 // (a != null) & (b != null) && // from the first level of the tree.
4153 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4154 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4156 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4157 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4162 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4164 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4167 // Get the dereference-able arrays.
4168 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4170 // For each array in the dereference list, construct a tree,
4171 // where the nodes are array and index variables and an edge 'u-v'
4172 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4173 // 'u-v-w' transitively if u[v][w] occurs.
4174 for (unsigned i = 0; i < deref->Size(); ++i)
4176 LC_Array& array = (*deref)[i];
4178 // First populate the array base variable.
4179 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4180 if (node == nullptr)
4182 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4186 // For each dimension (level) for the array, populate the tree with the variable
4187 // from that dimension.
4188 unsigned rank = (unsigned)array.GetDimRank();
4189 for (unsigned i = 0; i < rank; ++i)
4191 node->EnsureChildren(getAllocator());
4192 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4195 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4196 node->children->Push(tmp);
4199 // Descend one level down.
4203 // Keep the maxRank of all array dereferences.
4204 maxRank = max((int)rank, maxRank);
4210 for (unsigned i = 0; i < nodes.Size(); ++i)
4227 // First level will always yield the null-check, since it is made of the array base variables.
4228 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4229 // So add 1 after rank * 2.
4230 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4232 // Heuristic to not create too many blocks;
4238 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4239 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4240 for (unsigned i = 0; i < nodes.Size(); ++i)
4242 nodes[i]->DeriveLevelConditions(levelCond);
4245 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4250 //----------------------------------------------------------------------------
4251 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4254 // block - the block in which the helper call needs to be inserted.
4255 // insertBefore - the tree before which the helper call will be inserted.
4257 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4259 if (JitConfig.JitDebugLogLoopCloning() == 0)
4263 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4264 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4265 fgInsertStmtBefore(block, insertBefore, stmt);
4266 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4270 //------------------------------------------------------------------------
4271 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4272 // candidates gathered during the cloning phase.
4275 // loopNum - the current loop index for which the optimizations are performed.
4276 // context - data structure where all loop cloning info is kept.
4277 // dynamicPath - If true, the optimization is performed in the fast path among the
4278 // cloned loops. If false, it means this is the only path (i.e.,
4279 // there is no slow path.)
4282 // Perform the optimizations on the fast path i.e., the path in which the
4283 // optimization candidates were collected at the time of identifying them.
4284 // The candidates store all the information necessary (the tree/stmt/block
4285 // they are from) to perform the optimization.
4288 // The unoptimized path is either already cloned when this method is called or
4289 // there is no unoptimized path (got eliminated statically.) So this method
4290 // performs the optimizations assuming that the path in which the candidates
4291 // were collected is the fast path in which the optimizations will be performed.
4293 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4295 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4296 for (unsigned i = 0; i < optInfos->Size(); ++i)
4298 LcOptInfo* optInfo = optInfos->GetRef(i);
4299 switch (optInfo->GetOptType())
4301 case LcOptInfo::LcJaggedArray:
4303 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4304 compCurBB = arrIndexInfo->arrIndex.useBlock;
4305 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4307 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4310 case LcOptInfo::LcMdArray:
4311 // TODO-CQ: CLONE: Implement.
4319 //----------------------------------------------------------------------------
4320 // optCanCloneLoops: Use the environment flag to determine whether loop
4321 // cloning is allowed to be performed.
4324 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4325 // Disabled for retail for now.
4327 bool Compiler::optCanCloneLoops()
4329 // Enabled for retail builds now.
4330 unsigned cloneLoopsFlag = 1;
4332 cloneLoopsFlag = JitConfig.JitCloneLoops();
4334 return (cloneLoopsFlag != 0);
4337 //----------------------------------------------------------------------------
4338 // optIsLoopClonable: Determine whether this loop can be cloned.
4341 // loopInd loop index which needs to be checked if it can be cloned.
4344 // Returns true if the loop can be cloned. If it returns false
4345 // prints a message in debug as why the loop can't be cloned.
4347 bool Compiler::optIsLoopClonable(unsigned loopInd)
4349 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4350 // inserting new EH regions in the exception table yet.
4351 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4352 unsigned loopRetCount = 0;
4353 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4355 if (blk->bbJumpKind == BBJ_RETURN)
4359 if (bbIsTryBeg(blk))
4361 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4366 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4367 // into the middle of a handler (to go to the cloned copy.) Reject.
4368 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4370 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4374 // If the head and entry are in different EH regions, reject.
4375 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4377 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4381 // Is the first block after the last block of the loop a handler or filter start?
4382 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4383 // and go to where the original loop did. That raises problems when we don't actually go to
4384 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4385 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4386 // loop. This is just a corner to cut to get this working faster.
4387 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4388 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4390 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4394 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4395 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4396 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4397 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4398 if (fgReturnCount + loopRetCount > 4)
4400 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4401 "would exceed the limit of 4.\n",
4402 loopRetCount, fgReturnCount);
4406 // Otherwise, we're going to add those return blocks.
4407 fgReturnCount += loopRetCount;
4412 /*****************************************************************************
4414 * Identify loop cloning opportunities, derive loop cloning conditions,
4415 * perform loop cloning, use the derived conditions to choose which
4418 void Compiler::optCloneLoops()
4420 JITDUMP("\n*************** In optCloneLoops()\n");
4421 if (optLoopCount == 0 || !optCanCloneLoops())
4429 printf("Blocks/Trees at start of phase\n");
4430 fgDispBasicBlocks(true);
4434 LoopCloneContext context(optLoopCount, getAllocator());
4436 // Obtain array optimization candidates in the context.
4437 optObtainLoopCloningOpts(&context);
4439 // For each loop, derive cloning conditions for the optimization candidates.
4440 for (unsigned i = 0; i < optLoopCount; ++i)
4442 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4443 if (optInfos == nullptr)
4448 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4450 JITDUMP("> Conditions could not be obtained\n");
4451 context.CancelLoopOptInfo(i);
4455 bool allTrue = false;
4456 bool anyFalse = false;
4457 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4460 context.CancelLoopOptInfo(i);
4464 // Perform static optimizations on the fast path since we always
4465 // have to take the cloned path.
4466 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4468 // No need to clone.
4469 context.CancelLoopOptInfo(i);
4475 // The code in this #if has been useful in debugging loop cloning issues, by
4476 // enabling selective enablement of the loop cloning optimization according to
4479 unsigned methHash = info.compMethodHash();
4480 char* lostr = getenv("loopclonehashlo");
4481 unsigned methHashLo = 0;
4484 sscanf_s(lostr, "%x", &methHashLo);
4485 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4487 char* histr = getenv("loopclonehashhi");
4488 unsigned methHashHi = UINT32_MAX;
4491 sscanf_s(histr, "%x", &methHashHi);
4492 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4494 if (methHash < methHashLo || methHash > methHashHi)
4499 for (unsigned i = 0; i < optLoopCount; ++i)
4501 if (context.GetLoopOptInfo(i) != nullptr)
4504 context.OptimizeConditions(i DEBUGARG(verbose));
4505 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4506 optCloneLoop(i, &context);
4513 printf("\nAfter loop cloning:\n");
4514 fgDispBasicBlocks(/*dumpTrees*/ true);
4519 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4521 assert(loopInd < optLoopCount);
4523 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4524 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4525 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4527 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4528 unsigned depth = optLoopDepth(loopInd);
4529 unsigned ambientWeight = 1;
4530 for (unsigned j = 0; j < depth; j++)
4532 unsigned lastWeight = ambientWeight;
4533 ambientWeight *= BB_LOOP_WEIGHT;
4534 // If the multiplication overflowed, stick at max.
4535 // (Strictly speaking, a multiplication could overflow and still have a result
4536 // that is >= lastWeight...but if so, the original weight must be pretty large,
4537 // and it got bigger, so that's OK.)
4538 if (ambientWeight < lastWeight)
4540 ambientWeight = BB_MAX_WEIGHT;
4545 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4546 // Be safe by taking the max with the head block's weight.
4547 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4549 // This is the containing loop, if any -- to label any blocks we create that are outside
4550 // the loop being cloned.
4551 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4553 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4554 optEnsureUniqueHead(loopInd, ambientWeight);
4556 // We're going to make
4568 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4580 BasicBlock* h = optLoopTable[loopInd].lpHead;
4581 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4583 // Make a new block to be the unique entry to the loop.
4584 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4585 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4586 /*extendRegion*/ true);
4587 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4588 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4589 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4590 newH->bbNatLoopNum = ambientLoop;
4592 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4595 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4596 // "newPred" will be the predecessor of the blocks of the cloned loop.
4597 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4598 BasicBlock* newPred = b;
4599 if (b->bbJumpKind != BBJ_ALWAYS)
4601 BasicBlock* x = b->bbNext;
4604 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4605 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4607 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4608 x2->bbNatLoopNum = ambientLoop;
4611 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4616 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4617 // so that "h" already falls through to "e" (e == t == f).
4618 BasicBlock* h2 = nullptr;
4619 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4621 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4622 /*extendRegion*/ true);
4623 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4625 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4626 h2->bbNatLoopNum = ambientLoop;
4628 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4629 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4632 // Now we'll clone the blocks of the loop body.
4633 BasicBlock* newFirst = nullptr;
4634 BasicBlock* newBot = nullptr;
4636 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4637 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4640 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4641 /*extendRegion*/ true);
4643 BasicBlock::CloneBlockState(this, newBlk, blk);
4644 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4645 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4646 // loop, if one exists -- the parent of the loop we're cloning.
4647 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4649 if (newFirst == nullptr)
4653 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4655 blockMap->Set(blk, newBlk);
4658 // Perform the static optimizations on the fast path.
4659 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4661 // Now go through the new blocks, remapping their jump targets within the loop.
4662 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4666 BasicBlock* newblk = nullptr;
4667 bool b = blockMap->Lookup(blk, &newblk);
4668 assert(b && newblk != nullptr);
4670 assert(blk->bbJumpKind == newblk->bbJumpKind);
4672 // First copy the jump destination(s) from "blk".
4673 optCopyBlkDest(blk, newblk);
4675 // Now redirect the new block according to "blockMap".
4676 optRedirectBlock(newblk, blockMap);
4679 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4680 (h->bbJumpKind == BBJ_ALWAYS));
4682 // If all the conditions are true, go to E2.
4683 BasicBlock* e2 = nullptr;
4684 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4686 h->bbJumpKind = BBJ_COND;
4688 // We will create the following structure
4690 // cond0 (in h) -?> cond1
4691 // slow --> e2 (slow) always
4698 // We should always have block conditions, at the minimum, the array should be deref-able
4699 assert(context->HasBlockConditions(loopInd));
4701 // Create a unique header for the slow path.
4702 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4703 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4704 slowHead->bbNatLoopNum = ambientLoop;
4705 slowHead->bbJumpDest = e2;
4707 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4708 condLast->bbJumpDest = slowHead;
4710 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4713 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4715 assert(foundIt && e2 != nullptr);
4717 fgUpdateChangedFlowGraph();
4720 //--------------------------------------------------------------------------------------------------
4721 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4724 // context loop cloning context variable
4725 // loopNum the loop index
4726 // head loop head for "loopNum"
4727 // slowHead the slow path loop head
4733 // Create the following structure.
4735 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4736 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4738 // cond0 (in h) -?> cond1
4739 // slowHead --> e2 (slowHead) always
4740 // !cond1 -?> slowHead
4741 // !cond2 -?> slowHead
4743 // !condn -?> slowHead
4746 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4748 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4751 BasicBlock* slowHead)
4753 JITDUMP("Inserting loop cloning conditions\n");
4754 assert(context->HasBlockConditions(loopNum));
4756 BasicBlock* curCond = head;
4757 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4758 for (unsigned i = 0; i < levelCond->Size(); ++i)
4760 bool isHeaderBlock = (curCond == head);
4762 // Flip the condition if header block.
4763 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4765 // Create each condition block ensuring wiring between them.
4766 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4767 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4770 curCond->inheritWeight(head);
4771 curCond->bbNatLoopNum = head->bbNatLoopNum;
4772 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4775 // Finally insert cloning conditions after all deref conditions have been inserted.
4776 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4780 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4782 BasicBlock* h = optLoopTable[loopInd].lpHead;
4783 BasicBlock* t = optLoopTable[loopInd].lpTop;
4784 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4785 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4787 // If "h" dominates the entry block, then it is the unique header.
4788 if (fgDominate(h, e))
4793 // Otherwise, create a new empty header block, make it the pred of the entry block,
4794 // and redirect the preds of the entry block to go to this.
4796 BasicBlock* beforeTop = t->bbPrev;
4797 // Make sure that the new block is in the same region as the loop.
4798 // (We will only create loops that are entirely within a region.)
4799 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4800 // This is in the containing loop.
4801 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4802 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4804 // We don't care where it was put; splice it between beforeTop and top.
4805 if (beforeTop->bbNext != h2)
4807 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4808 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4812 if (h2->bbNext != e)
4814 h2->bbJumpKind = BBJ_ALWAYS;
4817 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4819 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4820 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4821 blockMap->Set(e, h2);
4823 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4825 BasicBlock* predBlock = predEntry->flBlock;
4827 // Skip if predBlock is in the loop.
4828 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4832 optRedirectBlock(predBlock, blockMap);
4835 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4838 /*****************************************************************************
4840 * Determine the kind of interference for the call.
4843 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4845 assert(call->gtOper == GT_CALL);
4847 // if not a helper, kills everything
4848 if (call->gtCall.gtCallType != CT_HELPER)
4853 // setfield and array address store kill all indirections
4854 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4856 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4857 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4858 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4859 case CORINFO_HELP_SETFIELDOBJ:
4860 case CORINFO_HELP_ARRADDR_ST:
4862 return CALLINT_REF_INDIRS;
4864 case CORINFO_HELP_SETFIELDFLOAT:
4865 case CORINFO_HELP_SETFIELDDOUBLE:
4866 case CORINFO_HELP_SETFIELD8:
4867 case CORINFO_HELP_SETFIELD16:
4868 case CORINFO_HELP_SETFIELD32:
4869 case CORINFO_HELP_SETFIELD64:
4871 return CALLINT_SCL_INDIRS;
4873 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4874 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4875 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4876 case CORINFO_HELP_SETFIELDSTRUCT:
4878 return CALLINT_ALL_INDIRS;
4884 // other helpers kill nothing
4885 return CALLINT_NONE;
4888 /*****************************************************************************
4890 * See if the given tree can be computed in the given precision (which must
4891 * be smaller than the type of the tree for this to make sense). If 'doit'
4892 * is false, we merely check to see whether narrowing is possible; if we
4893 * get called with 'doit' being true, we actually perform the narrowing.
4896 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4902 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4904 /* Assume we're only handling integer types */
4905 noway_assert(varTypeIsIntegral(srct));
4906 noway_assert(varTypeIsIntegral(dstt));
4908 unsigned srcSize = genTypeSize(srct);
4909 unsigned dstSize = genTypeSize(dstt);
4911 /* dstt must be smaller than srct to narrow */
4912 if (dstSize >= srcSize)
4917 /* Figure out what kind of a node we have */
4918 oper = tree->OperGet();
4919 kind = tree->OperKind();
4921 if (kind & GTK_ASGOP)
4923 noway_assert(doit == false);
4927 ValueNumPair NoVNPair = ValueNumPair();
4929 if (kind & GTK_LEAF)
4933 /* Constants can usually be narrowed by changing their value */
4934 CLANG_FORMAT_COMMENT_ANCHOR;
4936 #ifndef _TARGET_64BIT_
4941 lval = tree->gtIntConCommon.LngValue();
4970 if ((lval & lmask) != lval)
4975 tree->ChangeOperConst(GT_CNS_INT);
4976 tree->gtType = TYP_INT;
4977 tree->gtIntCon.gtIconVal = (int)lval;
4978 if (vnStore != nullptr)
4980 fgValueNumberTreeConst(tree);
4990 ival = tree->gtIntCon.gtIconVal;
5009 #ifdef _TARGET_64BIT_
5016 #endif // _TARGET_64BIT_
5021 if ((ival & imask) != ival)
5026 #ifdef _TARGET_64BIT_
5029 tree->gtType = TYP_INT;
5030 tree->gtIntCon.gtIconVal = (int)ival;
5031 if (vnStore != nullptr)
5033 fgValueNumberTreeConst(tree);
5036 #endif // _TARGET_64BIT_
5040 /* Operands that are in memory can usually be narrowed
5041 simply by changing their gtType */
5044 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5045 if (dstSize == sizeof(int))
5058 noway_assert(doit == false);
5062 if (kind & (GTK_BINOP | GTK_UNOP))
5065 op1 = tree->gtOp.gtOp1;
5067 op2 = tree->gtOp.gtOp2;
5069 switch (tree->gtOper)
5072 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5074 // Is op2 a small constant than can be narrowed into dstt?
5075 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5076 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5078 // We will change the type of the tree and narrow op2
5082 tree->gtType = genActualType(dstt);
5083 tree->SetVNs(vnpNarrow);
5085 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5086 // We may also need to cast away the upper bits of op1
5089 assert(tree->gtType == TYP_INT);
5090 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5092 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5094 tree->gtOp.gtOp1 = op1;
5105 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5107 noway_assert(doit == false);
5115 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5116 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5118 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5119 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5121 noway_assert(doit == false);
5125 /* Simply change the type of the tree */
5129 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5131 tree->gtFlags &= ~GTF_MUL_64RSLT;
5134 tree->gtType = genActualType(dstt);
5135 tree->SetVNs(vnpNarrow);
5143 /* Simply change the type of the tree */
5145 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5147 tree->gtType = genSignedType(dstt);
5148 tree->SetVNs(vnpNarrow);
5150 /* Make sure we don't mess up the variable type */
5151 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5153 tree->gtFlags |= GTF_VAR_CAST;
5166 /* These can always be narrowed since they only represent 0 or 1 */
5171 var_types cast = tree->CastToType();
5172 var_types oprt = op1->TypeGet();
5173 unsigned oprSize = genTypeSize(oprt);
5180 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5185 if (tree->gtOverflow())
5190 /* Is this a cast from the type we're narrowing to or a smaller one? */
5192 if (oprSize <= dstSize)
5194 /* Bash the target type of the cast */
5198 dstt = genSignedType(dstt);
5200 if (oprSize == dstSize)
5202 // Same size: change the CAST into a NOP
5203 tree->ChangeOper(GT_NOP);
5204 tree->gtType = dstt;
5205 tree->gtOp.gtOp2 = nullptr;
5206 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5210 // oprSize is smaller
5211 assert(oprSize < dstSize);
5213 // Change the CastToType in the GT_CAST node
5214 tree->CastToType() = dstt;
5216 // The result type of a GT_CAST is never a small type.
5217 // Use genActualType to widen dstt when it is a small types.
5218 tree->gtType = genActualType(dstt);
5219 tree->SetVNs(vnpNarrow);
5229 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5231 /* Simply change the type of the tree */
5235 tree->gtType = genActualType(dstt);
5236 tree->SetVNs(vnpNarrow);
5243 noway_assert(doit == false);
5251 /*****************************************************************************
5253 * The following logic figures out whether the given variable is assigned
5254 * somewhere in a list of basic blocks (or in an entire loop).
5257 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5259 GenTreePtr tree = *pTree;
5261 if (tree->OperKind() & GTK_ASGOP)
5263 GenTreePtr dest = tree->gtOp.gtOp1;
5264 genTreeOps destOper = dest->OperGet();
5266 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5267 assert(desc && desc->ivaSelf == desc);
5269 if (destOper == GT_LCL_VAR)
5271 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5272 if (tvar < lclMAX_ALLSET_TRACKED)
5274 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5278 desc->ivaMaskIncomplete = true;
5281 if (tvar == desc->ivaVar)
5283 if (tree != desc->ivaSkip)
5289 else if (destOper == GT_LCL_FLD)
5291 /* We can't track every field of every var. Moreover, indirections
5292 may access different parts of the var as different (but
5293 overlapping) fields. So just treat them as indirect accesses */
5295 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5296 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5298 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5299 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5301 else if (destOper == GT_CLS_VAR)
5303 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5305 else if (destOper == GT_IND)
5307 /* Set the proper indirection bits */
5309 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5310 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5313 else if (tree->gtOper == GT_CALL)
5315 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5316 assert(desc && desc->ivaSelf == desc);
5318 desc->ivaMaskCall = optCallInterf(tree);
5321 return WALK_CONTINUE;
5324 /*****************************************************************************/
5326 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5331 desc.ivaSkip = skip;
5333 desc.ivaSelf = &desc;
5336 desc.ivaMaskCall = CALLINT_NONE;
5337 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5343 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5345 noway_assert(stmt->gtOper == GT_STMT);
5346 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5368 /*****************************************************************************/
5369 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5373 /* Get hold of the loop descriptor */
5375 noway_assert(lnum < optLoopCount);
5376 loop = optLoopTable + lnum;
5378 /* Do we already know what variables are assigned within this loop? */
5380 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5387 /* Prepare the descriptor used by the tree walker call-back */
5389 desc.ivaVar = (unsigned)-1;
5390 desc.ivaSkip = nullptr;
5392 desc.ivaSelf = &desc;
5394 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5395 desc.ivaMaskInd = VR_NONE;
5396 desc.ivaMaskCall = CALLINT_NONE;
5397 desc.ivaMaskIncomplete = false;
5399 /* Now walk all the statements of the loop */
5401 beg = loop->lpHead->bbNext;
5402 end = loop->lpBottom;
5404 for (/**/; /**/; beg = beg->bbNext)
5408 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5410 noway_assert(stmt->gtOper == GT_STMT);
5411 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5413 if (desc.ivaMaskIncomplete)
5415 loop->lpFlags |= LPFLG_ASGVARS_INC;
5425 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5426 loop->lpAsgInds = desc.ivaMaskInd;
5427 loop->lpAsgCall = desc.ivaMaskCall;
5429 /* Now we know what variables are assigned in the loop */
5431 loop->lpFlags |= LPFLG_ASGVARS_YES;
5434 /* Now we can finally test the caller's mask against the loop's */
5435 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5440 switch (loop->lpAsgCall)
5444 /* Can't hoist if the call might have side effect on an indirection. */
5446 if (loop->lpAsgInds != VR_NONE)
5453 case CALLINT_REF_INDIRS:
5455 /* Can't hoist if the call might have side effect on an ref indirection. */
5457 if (loop->lpAsgInds & VR_IND_REF)
5464 case CALLINT_SCL_INDIRS:
5466 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5468 if (loop->lpAsgInds & VR_IND_SCL)
5475 case CALLINT_ALL_INDIRS:
5477 /* Can't hoist if the call might have side effect on any indirection. */
5479 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5488 /* Other helpers kill nothing */
5493 noway_assert(!"Unexpected lpAsgCall value");
5499 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5504 printf("\nHoisting a copy of ");
5505 printTreeID(origExpr);
5506 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5507 optLoopTable[lnum].lpBottom->bbNum);
5508 gtDispTree(origExpr);
5513 // This loop has to be in a form that is approved for hoisting.
5514 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5516 // Create a copy of the expression and mark it for CSE's.
5517 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5519 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5520 assert(hoistExpr != origExpr);
5521 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5523 GenTreePtr hoist = hoistExpr;
5524 // The value of the expression isn't used (unless it's an assignment).
5525 if (hoistExpr->OperGet() != GT_ASG)
5527 hoist = gtUnusedValNode(hoistExpr);
5530 /* Put the statement in the preheader */
5532 fgCreateLoopPreHeader(lnum);
5534 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5535 assert(preHead->bbJumpKind == BBJ_NONE);
5537 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5538 // (or in this case, will contain) the expression.
5539 compCurBB = preHead;
5541 // Increment the ref counts of any local vars appearing in "hoist".
5542 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5543 // fold away some of the lcl vars referenced by "hoist".
5544 lvaRecursiveIncRefCounts(hoist);
5546 hoist = fgMorphTree(hoist);
5548 GenTreePtr hoistStmt = gtNewStmt(hoist);
5549 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5551 /* simply append the statement at the end of the preHead's list */
5553 GenTreePtr treeList = preHead->bbTreeList;
5557 /* append after last statement */
5559 GenTreePtr last = treeList->gtPrev;
5560 assert(last->gtNext == nullptr);
5562 last->gtNext = hoistStmt;
5563 hoistStmt->gtPrev = last;
5564 treeList->gtPrev = hoistStmt;
5568 /* Empty pre-header - store the single statement in the block */
5570 preHead->bbTreeList = hoistStmt;
5571 hoistStmt->gtPrev = hoistStmt;
5574 hoistStmt->gtNext = nullptr;
5579 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5584 if (fgStmtListThreaded)
5586 gtSetStmtInfo(hoistStmt);
5587 fgSetStmtSeq(hoistStmt);
5591 if (m_nodeTestData != nullptr)
5594 // What is the depth of the loop "lnum"?
5596 unsigned lnumIter = lnum;
5597 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5600 lnumIter = optLoopTable[lnumIter].lpParent;
5603 NodeToTestDataMap* testData = GetNodeTestData();
5605 TestLabelAndNum tlAndN;
5606 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5608 if (tlAndN.m_num == -1)
5611 printTreeID(origExpr);
5612 printf(" was declared 'do not hoist', but is being hoisted.\n");
5615 else if (tlAndN.m_num != depth)
5618 printTreeID(origExpr);
5619 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5621 tlAndN.m_num, depth);
5626 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5627 // hoist" annotations.
5628 testData->Remove(origExpr);
5629 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5630 tlAndN.m_tl = TL_CSE_Def;
5631 tlAndN.m_num = m_loopHoistCSEClass++;
5632 testData->Set(hoistExpr, tlAndN);
5638 #if LOOP_HOIST_STATS
5639 if (!m_curLoopHasHoistedExpression)
5641 m_loopsWithHoistedExpressions++;
5642 m_curLoopHasHoistedExpression = true;
5644 m_totalHoistedExpressions++;
5645 #endif // LOOP_HOIST_STATS
5648 void Compiler::optHoistLoopCode()
5650 // If we don't have any loops in the method then take an early out now.
5651 if (optLoopCount == 0)
5657 unsigned jitNoHoist = JitConfig.JitNoHoist();
5665 // The code in this #if has been useful in debugging loop cloning issues, by
5666 // enabling selective enablement of the loop cloning optimization according to
5669 unsigned methHash = info.compMethodHash();
5670 char* lostr = getenv("loophoisthashlo");
5671 unsigned methHashLo = 0;
5674 sscanf_s(lostr, "%x", &methHashLo);
5675 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5677 char* histr = getenv("loophoisthashhi");
5678 unsigned methHashHi = UINT32_MAX;
5681 sscanf_s(histr, "%x", &methHashHi);
5682 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5684 if (methHash < methHashLo || methHash > methHashHi)
5686 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5688 #endif // 0 -- debugging loop cloning issues
5693 printf("\n*************** In optHoistLoopCode()\n");
5694 printf("Blocks/Trees before phase\n");
5695 fgDispBasicBlocks(true);
5700 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5701 // they are invariant.)
5702 LoopHoistContext hoistCtxt(this);
5703 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5705 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5710 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5712 optHoistLoopNest(lnum, &hoistCtxt);
5721 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5722 fgDispBasicBlocks(true);
5726 // Make sure that the predecessor lists are accurate
5727 fgDebugCheckBBlist();
5732 // Test Data stuff..
5733 // If we have no test data, early out.
5734 if (m_nodeTestData == nullptr)
5738 NodeToTestDataMap* testData = GetNodeTestData();
5739 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5741 TestLabelAndNum tlAndN;
5742 GenTreePtr node = ki.Get();
5743 bool b = testData->Lookup(node, &tlAndN);
5745 if (tlAndN.m_tl != TL_LoopHoist)
5749 // Otherwise, it is a loop hoist annotation.
5750 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5751 if (tlAndN.m_num >= 0)
5755 printf(" was declared 'must hoist', but has not been hoisted.\n");
5762 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5764 // Do this loop, then recursively do all nested loops.
5765 CLANG_FORMAT_COMMENT_ANCHOR;
5767 #if LOOP_HOIST_STATS
5769 m_curLoopHasHoistedExpression = false;
5770 m_loopsConsidered++;
5771 #endif // LOOP_HOIST_STATS
5773 optHoistThisLoop(lnum, hoistCtxt);
5775 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5777 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5779 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5780 // TODO-Cleanup: we should have a set abstraction for loops.
5781 if (hoistedInCurLoop != nullptr)
5783 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5787 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5789 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5793 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5794 child = optLoopTable[child].lpSibling)
5796 optHoistLoopNest(child, hoistCtxt);
5800 // TODO-Cleanup: we should have a set abstraction for loops.
5801 if (hoistedInCurLoop != nullptr)
5803 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5805 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5806 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5812 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5814 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5816 /* If loop was removed continue */
5818 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5823 /* Get the head and tail of the loop */
5825 BasicBlock* head = pLoopDsc->lpHead;
5826 BasicBlock* tail = pLoopDsc->lpBottom;
5827 BasicBlock* lbeg = pLoopDsc->lpEntry;
5830 // We must have a do-while loop
5831 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5836 // The loop-head must dominate the loop-entry.
5837 // TODO-CQ: Couldn't we make this true if it's not?
5838 if (!fgDominate(head, lbeg))
5843 // if lbeg is the start of a new try block then we won't be able to hoist
5844 if (!BasicBlock::sameTryRegion(head, lbeg))
5849 // We don't bother hoisting when inside of a catch block
5850 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5855 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5857 unsigned begn = lbeg->bbNum;
5858 unsigned endn = tail->bbNum;
5860 // Ensure the per-loop sets/tables are empty.
5861 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5866 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5867 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5871 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5873 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5874 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5875 pLoopDsc->lpHoistedExprCount = 0;
5877 #ifndef _TARGET_64BIT_
5878 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5880 if (longVarsCount > 0)
5882 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5883 // the Counts such that each TYP_LONG variable counts twice.
5885 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5886 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5891 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5892 lvaDispVarSet(lvaLongVars);
5895 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5896 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5898 #endif // !_TARGET_64BIT_
5903 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5904 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5906 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5907 lvaDispVarSet(pLoopDsc->lpVarInOut);
5909 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5910 lvaDispVarSet(loopVars);
5915 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5917 if (floatVarsCount > 0)
5919 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5920 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5922 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5923 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5924 pLoopDsc->lpHoistedFPExprCount = 0;
5926 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5927 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5932 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5933 lvaDispVarSet(inOutFPVars);
5935 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5936 lvaDispVarSet(loopFPVars);
5940 else // (floatVarsCount == 0)
5942 pLoopDsc->lpLoopVarFPCount = 0;
5943 pLoopDsc->lpVarInOutFPCount = 0;
5944 pLoopDsc->lpHoistedFPExprCount = 0;
5947 // Find the set of definitely-executed blocks.
5948 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5949 // Until we have post-dominators, we'll special-case for single-exit blocks.
5950 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5951 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5953 assert(pLoopDsc->lpExit != nullptr);
5954 BasicBlock* cur = pLoopDsc->lpExit;
5955 // Push dominators, until we reach "entry" or exit the loop.
5956 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5961 // If we didn't reach the entry block, give up and *just* push the entry block.
5962 if (cur != pLoopDsc->lpEntry)
5966 defExec.Push(pLoopDsc->lpEntry);
5968 else // More than one exit
5970 // We'll assume that only the entry block is definitely executed.
5971 // We could in the future do better.
5972 defExec.Push(pLoopDsc->lpEntry);
5975 while (defExec.Size() > 0)
5977 // Consider in reverse order: dominator before dominatee.
5978 BasicBlock* blk = defExec.Pop();
5979 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5983 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5984 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
5986 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5987 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5988 unsigned blkWeight = blk->getBBWeight(this);
5993 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
5994 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
5995 firstBlockAndBeforeSideEffect ? "true" : "false");
5996 if (blkWeight < (BB_UNITY_WEIGHT / 10))
5998 printf(" block weight is too small to perform hoisting.\n");
6003 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6005 // Block weight is too small to perform hoisting.
6009 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6011 GenTreePtr stmtTree = stmt->gtStmtExpr;
6013 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6016 // we will try to hoist the top-level stmtTree
6017 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6022 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6024 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6026 bool loopContainsCall = pLoopDsc->lpContainsCall;
6029 int hoistedExprCount;
6033 if (varTypeIsFloating(tree->TypeGet()))
6035 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6036 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6037 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6039 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6040 if (!loopContainsCall)
6042 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6045 // For ARM each double takes two FP registers
6046 // For now on ARM we won't track singles/doubles
6047 // and instead just assume that we always have doubles.
6054 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6055 loopVarCount = pLoopDsc->lpLoopVarCount;
6056 varInOutCount = pLoopDsc->lpVarInOutCount;
6058 availRegCount = CNT_CALLEE_SAVED - 1;
6059 if (!loopContainsCall)
6061 availRegCount += CNT_CALLEE_TRASH - 1;
6063 #ifndef _TARGET_64BIT_
6064 // For our 32-bit targets Long types take two registers.
6065 if (varTypeIsLong(tree->TypeGet()))
6067 availRegCount = (availRegCount + 1) / 2;
6072 // decrement the availRegCount by the count of expression that we have already hoisted.
6073 availRegCount -= hoistedExprCount;
6075 // the variables that are read/written inside the loop should
6076 // always be a subset of the InOut variables for the loop
6077 assert(loopVarCount <= varInOutCount);
6079 // When loopVarCount >= availRegCount we believe that all of the
6080 // available registers will get used to hold LclVars inside the loop.
6081 // This pessimistically assumes that each loopVar has a conflicting
6082 // lifetime with every other loopVar.
6083 // For this case we will hoist the expression only if is profitable
6084 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6085 // as we believe it will be placed in the stack or one of the other
6086 // loopVars will be spilled into the stack
6088 if (loopVarCount >= availRegCount)
6090 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6091 if (tree->gtCostEx < (2 * IND_COST_EX))
6097 // When varInOutCount < availRegCount we are know that there are
6098 // some available register(s) when we enter the loop body.
6099 // When varInOutCount == availRegCount there often will be a register
6100 // available when we enter the loop body, since a loop often defines a
6101 // LclVar on exit or there is often at least one LclVar that is worth
6102 // spilling to the stack to make way for this hoisted expression.
6103 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6105 if (varInOutCount > availRegCount)
6107 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6108 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6118 // This function returns true if 'tree' is a loop invariant expression.
6119 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6121 bool Compiler::optHoistLoopExprsForTree(
6122 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6124 // First do the children.
6125 // We must keep track of whether each child node was hoistable or not
6127 unsigned nChildren = tree->NumChildren();
6128 bool childrenHoistable[GenTree::MAX_CHILDREN];
6130 // Initialize the array elements for childrenHoistable[] to false
6131 for (unsigned i = 0; i < nChildren; i++)
6133 childrenHoistable[i] = false;
6136 bool treeIsInvariant = true;
6137 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6139 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6140 &childrenHoistable[childNum]))
6142 treeIsInvariant = false;
6146 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6148 bool treeIsHoistable = treeIsInvariant;
6150 // But we must see if anything else prevents "tree" from being hoisted.
6152 if (treeIsInvariant)
6154 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6155 treeIsHoistable = optIsCSEcandidate(tree);
6157 // If it's a call, it must be a helper call, and be pure.
6158 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6159 // (meaning it won't run a cctor because the class is not precise-init).
6160 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6162 GenTreeCall* call = tree->AsCall();
6163 if (call->gtCallType != CT_HELPER)
6165 treeIsHoistable = false;
6169 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6170 if (!s_helperCallProperties.IsPure(helpFunc))
6172 treeIsHoistable = false;
6174 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6176 treeIsHoistable = false;
6181 if (treeIsHoistable)
6183 if (!(*pFirstBlockAndBeforeSideEffect))
6185 // For now, we give up on an expression that might raise an exception if it is after the
6186 // first possible global side effect (and we assume we're after that if we're not in the first block).
6187 // TODO-CQ: this is when we might do loop cloning.
6189 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6191 treeIsHoistable = false;
6194 // Currently we must give up on reads from static variables (even if we are in the first block).
6196 if (tree->OperGet() == GT_CLS_VAR)
6198 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6200 treeIsHoistable = false;
6204 // Is the value of the whole tree loop invariant?
6206 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6208 // Is the value of the whole tree loop invariant?
6209 if (!treeIsInvariant)
6211 treeIsHoistable = false;
6215 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6216 // If we encounter a tree with a call in it
6217 // or if we see an assignment to global we set it to false.
6219 // If we are already set to false then we can skip these checks
6221 if (*pFirstBlockAndBeforeSideEffect)
6223 // For this purpose, we only care about memory side effects. We assume that expressions will
6224 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6225 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6226 // here, since that includes exceptions.)
6229 // If it's a call, it must be a helper call that does not mutate the heap.
6230 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6231 // (meaning it won't run a cctor because the class is not precise-init).
6232 GenTreeCall* call = tree->AsCall();
6233 if (call->gtCallType != CT_HELPER)
6235 *pFirstBlockAndBeforeSideEffect = false;
6239 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6240 if (s_helperCallProperties.MutatesHeap(helpFunc))
6242 *pFirstBlockAndBeforeSideEffect = false;
6244 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6246 *pFirstBlockAndBeforeSideEffect = false;
6250 else if (tree->OperIsAssignment())
6252 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6253 GenTreePtr lhs = tree->gtOp.gtOp1;
6254 if (lhs->gtFlags & GTF_GLOB_REF)
6256 *pFirstBlockAndBeforeSideEffect = false;
6259 else if (tree->OperIsCopyBlkOp())
6261 GenTreePtr args = tree->gtOp.gtOp1;
6262 assert(args->OperGet() == GT_LIST);
6263 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6265 *pFirstBlockAndBeforeSideEffect = false;
6270 // If this 'tree' is hoistable then we return and the caller will
6271 // decide to hoist it as part of larger hoistable expression.
6273 if (!treeIsHoistable)
6275 // We are not hoistable so we will now hoist any hoistable children.
6277 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6279 if (childrenHoistable[childNum])
6281 // We can't hoist the LHS of an assignment, isn't a real use.
6282 if (childNum == 0 && (tree->OperIsAssignment()))
6287 GenTreePtr child = tree->GetChild(childNum);
6289 // We try to hoist this 'child' tree
6290 optHoistCandidate(child, lnum, hoistCtxt);
6295 *pHoistable = treeIsHoistable;
6296 return treeIsInvariant;
6299 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6301 if (lnum == BasicBlock::NOT_IN_LOOP)
6303 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6307 // The outer loop also must be suitable for hoisting...
6308 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6313 // If the hoisted expression isn't valid at this loop head then break
6314 if (!optTreeIsValidAtLoopHead(tree, lnum))
6319 // It must pass the hoistable profitablity tests for this loop level
6320 if (!optIsProfitableToHoistableTree(tree, lnum))
6326 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6328 // already hoisted in a parent loop, so don't hoist this expression.
6332 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6334 // already hoisted this expression in the current loop, so don't hoist this expression.
6338 // Expression can be hoisted
6339 optPerformHoistExpr(tree, lnum);
6341 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6342 if (!varTypeIsFloating(tree->TypeGet()))
6344 optLoopTable[lnum].lpHoistedExprCount++;
6345 #ifndef _TARGET_64BIT_
6346 // For our 32-bit targets Long types take two registers.
6347 if (varTypeIsLong(tree->TypeGet()))
6349 optLoopTable[lnum].lpHoistedExprCount++;
6353 else // Floating point expr hoisted
6355 optLoopTable[lnum].lpHoistedFPExprCount++;
6358 // Record the hoisted expression in hoistCtxt
6359 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6362 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6364 // If it is not a VN, is not loop-invariant.
6365 if (vn == ValueNumStore::NoVN)
6370 // We'll always short-circuit constants.
6371 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6376 // If we've done this query previously, don't repeat.
6377 bool previousRes = false;
6378 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6385 if (vnStore->GetVNFunc(vn, &funcApp))
6387 if (funcApp.m_func == VNF_PhiDef)
6389 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6390 VNFuncApp phiDefValFuncApp;
6391 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6393 // It's not *really* a definition, rather a pass-through of some other VN.
6394 // (This could occur, say if both sides of an if-then-else diamond made the
6395 // same assignment to a variable.)
6396 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6400 // Is the definition within the loop? If so, is not loop-invariant.
6401 unsigned lclNum = funcApp.m_args[0];
6402 unsigned ssaNum = funcApp.m_args[1];
6403 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6404 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6407 else if (funcApp.m_func == VNF_PhiHeapDef)
6409 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6410 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6414 for (unsigned i = 0; i < funcApp.m_arity; i++)
6416 // TODO-CQ: We need to either make sure that *all* VN functions
6417 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6419 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6429 // Non-function "new, unique" VN's may be annotated with the loop nest where
6430 // their definition occurs.
6431 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
6433 if (vnLoopNum == MAX_LOOP_NUM)
6439 res = !optLoopContains(lnum, vnLoopNum);
6443 loopVnInvariantCache->Set(vn, res);
6447 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6449 if (tree->OperIsLocal())
6451 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6452 unsigned lclNum = lclVar->gtLclNum;
6454 // The lvlVar must be have an Ssa tracked lifetime
6455 if (fgExcludeFromSsa(lclNum))
6460 // If the loop does not contains the SSA def we can hoist it.
6461 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6466 else if (tree->OperIsConst())
6470 else // If every one of the children nodes are valid at this Loop's Head.
6472 unsigned nChildren = tree->NumChildren();
6473 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6475 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6485 /*****************************************************************************
6487 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6488 * header. The pre-header will replace the current lpHead in the loop table.
6489 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6490 * will also be dominated by the loop-top, lpHead->bbNext.
6494 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6496 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6498 /* This loop has to be a "do-while" loop */
6500 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6502 /* Have we already created a loop-preheader block? */
6504 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6509 BasicBlock* head = pLoopDsc->lpHead;
6510 BasicBlock* top = pLoopDsc->lpTop;
6511 BasicBlock* entry = pLoopDsc->lpEntry;
6513 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6514 if (!BasicBlock::sameTryRegion(head, entry))
6519 // Ensure that lpHead always dominates lpEntry
6521 noway_assert(fgDominate(head, entry));
6523 /* Get hold of the first block of the loop body */
6525 assert(top == entry);
6527 /* Allocate a new basic block */
6529 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6530 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6532 // Must set IL code offset
6533 preHead->bbCodeOffs = top->bbCodeOffs;
6535 // Set the default value of the preHead weight in case we don't have
6536 // valid profile data and since this blocks weight is just an estimate
6537 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6539 preHead->inheritWeight(head);
6540 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6545 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6546 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6550 // The preheader block is part of the containing loop (if any).
6551 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6553 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6555 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6557 preHead->bbWeight = 0;
6558 preHead->bbFlags |= BBF_RUN_RARELY;
6562 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6563 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6564 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6566 if (allValidProfileWeights)
6568 double loopEnteredCount;
6569 double loopSkippedCount;
6571 if (fgHaveValidEdgeWeights)
6573 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6574 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6575 noway_assert(edgeToNext != nullptr);
6576 noway_assert(edgeToJump != nullptr);
6579 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6581 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6585 loopEnteredCount = (double)head->bbNext->bbWeight;
6586 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6589 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6591 // Calculate a good approximation of the preHead's block weight
6592 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6593 preHead->setBBWeight(max(preHeadWeight, 1));
6594 noway_assert(!preHead->isRunRarely());
6599 // Link in the preHead block.
6600 fgInsertBBbefore(top, preHead);
6602 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6603 // However, that is too expensive at this point. Instead, we update the phi
6604 // node block references, if we created pre-header block due to hoisting.
6605 // This is sufficient because any definition participating in SSA that flowed
6606 // into the phi via the loop header block will now flow through the preheader
6607 // block from the header block.
6609 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6611 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6612 if (tree->OperGet() != GT_ASG)
6616 GenTreePtr op2 = tree->gtGetOp2();
6617 if (op2->OperGet() != GT_PHI)
6621 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6622 while (args != nullptr)
6624 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6625 if (phiArg->gtPredBB == head)
6627 phiArg->gtPredBB = preHead;
6629 args = args->Rest();
6633 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6634 // to set the handler index on the pre header without updating the exception table.
6635 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6637 // Update the EH table to make the hoisted block part of the loop's EH block.
6638 fgExtendEHRegionBefore(top);
6640 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6641 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6643 /* Update the loop entry */
6645 pLoopDsc->lpHead = preHead;
6646 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6648 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6649 All predecessors of 'beg', (which is the entry in the loop)
6650 now have to jump to 'preHead', unless they are dominated by 'head' */
6652 preHead->bbRefs = 0;
6653 fgAddRefPred(preHead, head);
6654 bool checkNestedLoops = false;
6656 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6658 BasicBlock* predBlock = pred->flBlock;
6660 if (fgDominate(top, predBlock))
6662 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6663 // (we know that 'head' dominates 'top'), but using 'top' instead of
6664 // 'head' in the test allows us to not enter here if 'predBlock == head'
6666 if (predBlock != pLoopDsc->lpBottom)
6668 noway_assert(predBlock != head);
6669 checkNestedLoops = true;
6674 switch (predBlock->bbJumpKind)
6677 noway_assert(predBlock == head);
6681 if (predBlock == head)
6683 noway_assert(predBlock->bbJumpDest != top);
6689 case BBJ_EHCATCHRET:
6690 noway_assert(predBlock->bbJumpDest == top);
6691 predBlock->bbJumpDest = preHead;
6692 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6694 if (predBlock == head)
6696 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6697 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6698 // Just break, pred will be removed after switch.
6702 fgRemoveRefPred(top, predBlock);
6703 fgAddRefPred(preHead, predBlock);
6709 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6710 BasicBlock** jumpTab;
6711 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6716 if ((*jumpTab) == top)
6718 (*jumpTab) = preHead;
6720 fgRemoveRefPred(top, predBlock);
6721 fgAddRefPred(preHead, predBlock);
6722 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6724 } while (++jumpTab, --jumpCnt);
6727 noway_assert(!"Unexpected bbJumpKind");
6732 noway_assert(!fgGetPredForBlock(top, preHead));
6733 fgRemoveRefPred(top, head);
6734 fgAddRefPred(top, preHead);
6737 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6738 (other than the back-edge of the loop we are considering) then we likely have nested
6739 do-while loops with the same entry block and inserting the preheader block changes the head
6740 of all the nested loops. Now we will update this piece of information in the loop table, and
6741 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6742 do-while loops with the same entry block).
6744 if (checkNestedLoops)
6746 for (unsigned l = 0; l < optLoopCount; l++)
6748 if (optLoopTable[l].lpHead == head)
6750 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6751 noway_assert(optLoopTable[l].lpEntry == top);
6752 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6753 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6757 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6758 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6766 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6768 unsigned lnum = blk->bbNatLoopNum;
6769 while (lnum != BasicBlock::NOT_IN_LOOP)
6771 if (optLoopTable[lnum].lpEntry == blk)
6776 lnum = optLoopTable[lnum].lpParent;
6781 void Compiler::optComputeLoopSideEffects()
6784 for (lnum = 0; lnum < optLoopCount; lnum++)
6786 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6787 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6788 optLoopTable[lnum].lpContainsCall = false;
6791 for (lnum = 0; lnum < optLoopCount; lnum++)
6793 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6798 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6799 { // Is outermost...
6800 optComputeLoopNestSideEffects(lnum);
6804 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6805 #ifndef _TARGET_64BIT_
6806 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6809 for (unsigned i = 0; i < lvaCount; i++)
6811 LclVarDsc* varDsc = &lvaTable[i];
6812 if (varDsc->lvTracked)
6814 if (varTypeIsFloating(varDsc->lvType))
6816 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6818 #ifndef _TARGET_64BIT_
6819 else if (varTypeIsLong(varDsc->lvType))
6821 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6828 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6830 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6831 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6832 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6834 optComputeLoopSideEffectsOfBlock(bbInLoop);
6838 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6840 unsigned mostNestedLoop = blk->bbNatLoopNum;
6841 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6843 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6845 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6847 // Now iterate over the remaining statements, and their trees.
6848 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6850 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6852 genTreeOps oper = tree->OperGet();
6854 // Even after we set heapHavoc we still may want to know if a loop contains calls
6857 if (oper == GT_CALL)
6859 // Record that this loop contains a call
6860 AddContainsCallAllContainingLoops(mostNestedLoop);
6863 // If we just set lpContainsCall or it was previously set
6864 if (optLoopTable[mostNestedLoop].lpContainsCall)
6866 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6870 // We are just looking for GT_CALL nodes after heapHavoc was set.
6874 // otherwise heapHavoc is not set
6877 // This body is a distillation of the heap-side effect code of value numbering.
6878 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6879 // that the compiler creates.
6881 if (GenTree::OperIsAssignment(oper))
6883 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6885 if (lhs->OperGet() == GT_IND)
6887 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6888 FieldSeqNode* fldSeqArrElem = nullptr;
6890 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6898 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6900 // If it's a local byref for which we recorded a value number, use that...
6901 GenTreeLclVar* argLcl = arg->AsLclVar();
6902 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6905 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6907 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6908 funcApp.m_func == VNF_PtrToArrElem)
6910 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6911 CORINFO_CLASS_HANDLE elemType =
6912 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6913 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6914 // Don't set heapHavoc below.
6921 // Is the LHS an array index expression?
6922 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6924 // We actually ignore "fldSeq" -- any modification to an S[], at any
6925 // field of "S", will lose all information about the array type.
6926 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6927 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6931 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6933 GenTreePtr obj = nullptr; // unused
6934 GenTreePtr staticOffset = nullptr; // unused
6935 FieldSeqNode* fldSeq = nullptr;
6937 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6938 (fldSeq != FieldSeqStore::NotAField()))
6940 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6941 assert(fldSeq != nullptr);
6942 if (fldSeq->IsFirstElemFieldSeq())
6944 fldSeq = fldSeq->m_next;
6945 assert(fldSeq != nullptr);
6948 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6956 else if (lhs->OperIsBlk())
6958 GenTreeLclVarCommon* lclVarTree;
6960 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6962 // For now, assume arbitrary side effects on the heap...
6966 else if (lhs->OperGet() == GT_CLS_VAR)
6968 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6970 // Otherwise, must be local lhs form. I should assert that.
6971 else if (lhs->OperGet() == GT_LCL_VAR)
6973 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6974 GenTreePtr rhs = tree->gtOp.gtOp2;
6975 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6976 // If we gave the RHS a value number, propagate it.
6977 if (rhsVN != ValueNumStore::NoVN)
6979 rhsVN = vnStore->VNNormVal(rhsVN);
6980 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6982 lvaTable[lhsLcl->GetLclNum()]
6983 .GetPerSsaData(lhsLcl->GetSsaNum())
6984 ->m_vnPair.SetLiberal(rhsVN);
6989 else // not GenTree::OperIsAssignment(oper)
6994 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
6998 // Is it an addr of a array index expression?
7000 GenTreePtr addrArg = tree->gtOp.gtOp1;
7001 if (addrArg->OperGet() == GT_IND)
7003 // Is the LHS an array index expression?
7004 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7007 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7009 CORINFO_CLASS_HANDLE elemType =
7010 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7011 tree->gtVNPair.SetBoth(
7012 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7013 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7014 // The rest are dummy arguments.
7015 vnStore->VNForNull(), vnStore->VNForNull(),
7016 vnStore->VNForNull()));
7022 case GT_LOCKADD: // Binop
7023 case GT_XADD: // Binop
7024 case GT_XCHG: // Binop
7025 case GT_CMPXCHG: // Specialop
7033 GenTreeCall* call = tree->AsCall();
7035 // Record that this loop contains a call
7036 AddContainsCallAllContainingLoops(mostNestedLoop);
7038 if (call->gtCallType == CT_HELPER)
7040 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7041 if (s_helperCallProperties.MutatesHeap(helpFunc))
7045 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7047 // If the call is labeled as "Hoistable", then we've checked the
7048 // class that would be constructed, and it is not precise-init, so
7049 // the cctor will not be run by this call. Otherwise, it might be,
7050 // and might have arbitrary side effects.
7051 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7065 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7074 // Record that all loops containing this block have heap havoc effects.
7075 unsigned lnum = mostNestedLoop;
7076 while (lnum != BasicBlock::NOT_IN_LOOP)
7078 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7079 lnum = optLoopTable[lnum].lpParent;
7084 // Marks the containsCall information to "lnum" and any parent loops.
7085 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7087 assert(0 <= lnum && lnum < optLoopCount);
7088 while (lnum != BasicBlock::NOT_IN_LOOP)
7090 optLoopTable[lnum].lpContainsCall = true;
7091 lnum = optLoopTable[lnum].lpParent;
7095 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7096 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7098 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7099 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7101 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7102 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7105 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7106 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7108 assert(0 <= lnum && lnum < optLoopCount);
7109 while (lnum != BasicBlock::NOT_IN_LOOP)
7111 optLoopTable[lnum].AddVariableLiveness(this, blk);
7112 lnum = optLoopTable[lnum].lpParent;
7116 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7117 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7119 assert(0 <= lnum && lnum < optLoopCount);
7120 while (lnum != BasicBlock::NOT_IN_LOOP)
7122 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7123 lnum = optLoopTable[lnum].lpParent;
7127 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7128 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7130 assert(0 <= lnum && lnum < optLoopCount);
7131 while (lnum != BasicBlock::NOT_IN_LOOP)
7133 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7134 lnum = optLoopTable[lnum].lpParent;
7138 /*****************************************************************************
7140 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7141 * The 'keepList'is either a single tree or a list of trees that are formed by
7142 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7143 * gtExtractSideEffList method.
7147 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7149 GenTreePtr tree = *pTree;
7150 Compiler* comp = data->compiler;
7151 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7153 // We may have a non-NULL side effect list that is being kept
7157 GenTreePtr keptTree = keepList;
7158 while (keptTree->OperGet() == GT_COMMA)
7160 assert(keptTree->OperKind() & GTK_SMPOP);
7161 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7162 GenTreePtr op2 = keptTree->gtGetOp2();
7164 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7165 // that is being kept because it contains some side-effect
7169 // This tree and all of its sub trees are being kept.
7170 return WALK_SKIP_SUBTREES;
7173 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7174 // which can again be another GT_COMMA or the final side-effect part
7178 if (tree == keptTree)
7180 // This tree and all of its sub trees are being kept.
7181 return WALK_SKIP_SUBTREES;
7185 // This node is being removed from the graph of GenTreePtr
7187 // Look for any local variable references
7189 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7194 /* This variable ref is going away, decrease its ref counts */
7196 lclNum = tree->gtLclVarCommon.gtLclNum;
7197 assert(lclNum < comp->lvaCount);
7198 varDsc = comp->lvaTable + lclNum;
7200 // make sure it's been initialized
7201 assert(comp->compCurBB != nullptr);
7202 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7204 /* Decrement its lvRefCnt and lvRefCntWtd */
7206 // Use getBBWeight to determine the proper block weight.
7207 // This impacts the block weights when we have IBC data.
7208 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7211 return WALK_CONTINUE;
7214 /*****************************************************************************
7216 * Routine called to decrement the LclVar ref counts when removing a tree
7217 * during the remove RangeCheck phase.
7218 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7219 * unless the node is found in the 'keepList' (which are saved side effects)
7220 * The keepList is communicated using the walkData.pCallbackData field
7221 * Also the compCurBB must be set to the current BasicBlock which contains
7222 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7225 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7227 // We communicate this value using the walkData.pCallbackData field
7229 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7232 /*****************************************************************************
7234 * Given an array index node, mark it as not needing a range check.
7237 void Compiler::optRemoveRangeCheck(
7238 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7254 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7257 noway_assert(stmt->gtOper == GT_STMT);
7258 noway_assert(tree->gtOper == GT_COMMA);
7259 noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
7260 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7262 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7267 printf("Before optRemoveRangeCheck:\n");
7272 GenTreePtr sideEffList = nullptr;
7275 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7278 // Decrement the ref counts for any LclVars that are being deleted
7280 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7282 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7283 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7285 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7286 tree->gtFlags |= GTF_DONT_CSE;
7288 /* Recalculate the gtCostSz, etc... */
7289 gtSetStmtInfo(stmt);
7291 /* Re-thread the nodes if necessary */
7292 if (fgStmtListThreaded)
7300 printf("After optRemoveRangeCheck:\n");
7306 /*****************************************************************************
7307 * Return the scale in an array reference, given a pointer to the
7308 * multiplication node.
7311 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7314 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7315 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7317 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7319 if (mul->gtOper == GT_LSH)
7321 scale = ((ssize_t)1) << scale;
7324 GenTreePtr index = mul->gtOp.gtOp1;
7326 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7328 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7329 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7330 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7331 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7332 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7333 index = index->gtOp.gtOp1;
7336 assert(!bRngChk || index->gtOper != GT_COMMA);
7346 /*****************************************************************************
7347 * Find the last assignment to of the local variable in the block. Return
7348 * RHS or NULL. If any local variable in the RHS has been killed in
7349 * intervening code, return NULL. If the variable being searched for is killed
7350 * in the intervening code, return NULL.
7354 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7356 VARSET_TP* pKilledInOut,
7357 bool* pLhsRhsKilledAfterInit)
7359 assert(pKilledInOut);
7360 assert(pLhsRhsKilledAfterInit);
7362 *pLhsRhsKilledAfterInit = false;
7364 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7366 GenTreePtr list = block->bbTreeList;
7367 if (list == nullptr)
7372 GenTreePtr rhs = nullptr;
7373 GenTreePtr stmt = list;
7376 stmt = stmt->gtPrev;
7377 if (stmt == nullptr)
7382 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7383 // If we encounter an assignment to a local variable,
7384 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7386 // And the assigned variable equals the input local,
7387 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7389 // If the assignment is '=' and it is not a conditional, then return rhs.
7390 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7392 rhs = tree->gtOp.gtOp2;
7394 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7395 // as we found a kill to the input local.
7398 *pLhsRhsKilledAfterInit = true;
7399 assert(rhs == nullptr);
7405 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7406 if (varDsc == nullptr)
7410 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7413 } while (stmt != list);
7420 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7421 varRefKinds rhsRefs = VR_NONE;
7422 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7423 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7424 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7426 // If RHS has been indirectly referenced, consider it a write and a kill.
7427 *pLhsRhsKilledAfterInit = true;
7434 /*****************************************************************************
7436 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7441 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7443 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7445 add1 += op1->gtIntCon.gtIconVal;
7446 add2 += op2->gtIntCon.gtIconVal;
7450 /* Check for +/- constant on either operand */
7452 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7454 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7455 op1 = op1->gtOp.gtOp1;
7458 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7460 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7461 op2 = op2->gtOp.gtOp1;
7464 /* We only allow local variable references */
7466 if (op1->gtOper != GT_LCL_VAR)
7468 if (op2->gtOper != GT_LCL_VAR)
7470 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7473 /* NOTE: Caller ensures that this variable has only one def */
7475 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7476 // printf("size [%d]:\n", add2); gtDispTree(op2);
7480 return (bool)(add1 <= add2);
7485 //------------------------------------------------------------------------------
7486 // optObtainLoopCloningOpts: Identify optimization candidates and update
7487 // the "context" for array optimizations.
7490 // context - data structure where all loop cloning info is kept. The
7491 // optInfo fields of the context are updated with the
7492 // identified optimization candidates.
7494 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7496 for (unsigned i = 0; i < optLoopCount; i++)
7498 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7499 if (optIsLoopClonable(i))
7501 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7503 optIdentifyLoopOptInfo(i, context);
7506 JITDUMP("------------------------------------------------------------\n");
7511 //------------------------------------------------------------------------
7512 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7513 // check if the loop is suitable for the optimizations performed.
7516 // loopNum - the current loop index for which conditions are derived.
7517 // context - data structure where all loop cloning candidates will be
7521 // If the loop is not suitable for the optimizations, return false - context
7522 // should not contain any optimization candidate for the loop if false.
7523 // Else return true.
7526 // Check if the loop is well formed for this optimization and identify the
7527 // optimization candidates and update the "context" parameter with all the
7528 // contextual information necessary to perform the optimization later.
7530 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7532 noway_assert(loopNum < optLoopCount);
7534 LoopDsc* pLoop = &optLoopTable[loopNum];
7536 if (!(pLoop->lpFlags & LPFLG_ITER))
7538 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7542 unsigned ivLclNum = pLoop->lpIterVar();
7543 if (lvaVarAddrExposed(ivLclNum))
7545 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7549 BasicBlock* head = pLoop->lpHead;
7550 BasicBlock* end = pLoop->lpBottom;
7551 BasicBlock* beg = head->bbNext;
7553 if (end->bbJumpKind != BBJ_COND)
7555 JITDUMP("> Couldn't find termination test.\n");
7559 if (end->bbJumpDest != beg)
7561 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7565 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7566 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7568 JITDUMP("> Loop iteration operator not matching\n");
7572 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7573 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7575 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7579 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7580 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7581 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7582 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7584 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7585 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7589 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7591 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7592 pLoop->lpTestTree->gtTreeID);
7597 GenTreePtr op1 = pLoop->lpIterator();
7598 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7601 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7602 end->bbNext ? end->bbNext->bbNum : 0);
7604 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7605 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7608 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7611 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7618 //---------------------------------------------------------------------------------------------------------------
7619 // optExtractArrIndex: Try to extract the array index from "tree".
7622 // tree the tree to be checked if it is the array [] operation.
7623 // result the extracted GT_INDEX information is updated in result.
7624 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7627 // Returns true if array index can be extracted, else, return false. See assumption about
7628 // what will be extracted. The "result" variable's rank parameter is advanced for every
7629 // dimension of [] encountered.
7632 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7633 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7634 // to reconstruct this to be able to know if this is an array access.
7637 // The method extracts only if the array base and indices are GT_LCL_VAR.
7639 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7641 // [000000001AF828D8] ---XG------- indir int
7642 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7643 // [000000001AF87340] ------------ + byref
7644 // [000000001AF87160] -------N---- const long 2
7645 // [000000001AF871D8] ------------ << long
7646 // [000000001AF870C0] ------------ cast long <- int
7647 // [000000001AF86F30] i----------- lclVar int V04 loc0
7648 // [000000001AF87250] ------------ + byref
7649 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7650 // [000000001AF87468] ---XG------- comma int
7651 // [000000001AF87020] ---X-------- arrBndsChk void
7652 // [000000001AF86FA8] ---X-------- arrLen int
7653 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7654 // [000000001AF82860] ------------ lclVar int V04 loc0
7655 // [000000001AF829F0] -A-XG------- = int
7656 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7658 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7660 if (tree->gtOper != GT_COMMA)
7664 GenTreePtr before = tree->gtGetOp1();
7665 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7669 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7670 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7674 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7678 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7679 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7684 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7686 GenTreePtr after = tree->gtGetOp2();
7688 if (after->gtOper != GT_IND)
7692 // It used to be the case that arrBndsChks for struct types would fail the previous check because
7693 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
7694 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
7695 // that we were not previously cloning.)
7696 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
7698 if (varTypeIsStruct(after))
7703 GenTreePtr sibo = after->gtGetOp1();
7704 if (sibo->gtOper != GT_ADD)
7708 GenTreePtr sib = sibo->gtGetOp1();
7709 GenTreePtr ofs = sibo->gtGetOp2();
7710 if (ofs->gtOper != GT_CNS_INT)
7714 if (sib->gtOper != GT_ADD)
7718 GenTreePtr si = sib->gtGetOp2();
7719 GenTreePtr base = sib->gtGetOp1();
7720 if (si->gtOper != GT_LSH)
7724 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7728 GenTreePtr scale = si->gtGetOp2();
7729 GenTreePtr index = si->gtGetOp1();
7730 if (scale->gtOper != GT_CNS_INT)
7734 #ifdef _TARGET_AMD64_
7735 if (index->gtOper != GT_CAST)
7739 GenTreePtr indexVar = index->gtGetOp1();
7741 GenTreePtr indexVar = index;
7743 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7747 if (lhsNum == BAD_VAR_NUM)
7749 result->arrLcl = arrLcl;
7751 result->indLcls.Push(indLcl);
7752 result->bndsChks.Push(tree);
7753 result->useBlock = compCurBB;
7759 //---------------------------------------------------------------------------------------------------------------
7760 // optReconstructArrIndex: Reconstruct array index.
7763 // tree the tree to be checked if it is an array [][][] operation.
7764 // result the extracted GT_INDEX information.
7765 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7768 // Returns true if array index can be extracted, else, return false. "rank" field in
7769 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7772 // Recursively look for a list of array indices. In the example below, we encounter,
7773 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7774 // The return value would then be:
7775 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7777 // V00[V01][V02] would be morphed as:
7779 // [000000001B366848] ---XG------- indir int
7780 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7781 // [000000001B36C200] ---XG------- comma int
7782 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7783 // [000000001B36C278] -A-XG------- comma int
7784 // [000000001B366730] R--XG------- indir ref
7785 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7786 // [000000001B36C818] ---XG------- comma ref
7787 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7788 // [000000001B36BB60] -A-XG------- = ref
7789 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7790 // [000000001B36A668] -A-XG------- = int
7791 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7794 // The method extracts only if the array base and indices are GT_LCL_VAR.
7796 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7798 // If we can extract "tree" (which is a top level comma) return.
7799 if (optExtractArrIndex(tree, result, lhsNum))
7803 // We have a comma (check if array base expr is computed in "before"), descend further.
7804 else if (tree->OperGet() == GT_COMMA)
7806 GenTreePtr before = tree->gtGetOp1();
7807 // "before" should evaluate an array base for the "after" indexing.
7808 if (before->OperGet() != GT_ASG)
7812 GenTreePtr lhs = before->gtGetOp1();
7813 GenTreePtr rhs = before->gtGetOp2();
7815 // "rhs" should contain an GT_INDEX
7816 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7820 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7821 GenTreePtr after = tree->gtGetOp2();
7822 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7823 return optExtractArrIndex(after, result, lhsNum);
7829 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7831 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7834 //-------------------------------------------------------------------------
7835 // optIsStackLocalInvariant: Is stack local invariant in loop.
7838 // loopNum The loop in which the variable is tested for invariance.
7839 // lclNum The local that is tested for invariance in the loop.
7842 // Returns true if the variable is loop invariant in loopNum.
7844 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7846 if (lvaVarAddrExposed(lclNum))
7850 if (optIsVarAssgLoop(loopNum, lclNum))
7857 //----------------------------------------------------------------------------------------------
7858 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7859 // identify as potential candidate and update the loop context.
7862 // tree The tree encountered during the tree walk.
7863 // info Supplies information about the current block or stmt in which the tree is.
7864 // Also supplies the "context" pointer for updating with loop cloning
7865 // candidates. Also supplies loopNum.
7868 // If array index can be reconstructed, check if the iter var of the loop matches the
7869 // array index var in some dim. Also ensure other index vars before the identified
7870 // dim are loop invariant.
7873 // Skip sub trees if the optimization candidate is identified or else continue walking
7875 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7877 ArrIndex arrIndex(getAllocator());
7879 // Check if array index can be optimized.
7880 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7882 assert(tree->gtOper == GT_COMMA);
7886 JITDUMP("Found ArrIndex at tree ");
7888 printf(" which is equivalent to: ");
7893 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7895 return WALK_SKIP_SUBTREES;
7898 // Walk the dimensions and see if iterVar of the loop is used as index.
7899 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7901 // Is index variable also used as the loop iter var.
7902 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7904 // Check the previous indices are all loop invariant.
7905 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7907 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7909 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7910 return WALK_SKIP_SUBTREES;
7916 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7918 JITDUMP(" on dim %d\n", dim);
7921 // Update the loop context.
7922 info->context->EnsureLoopOptInfo(info->loopNum)
7923 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7927 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7931 return WALK_SKIP_SUBTREES;
7933 else if (tree->gtOper == GT_ARR_ELEM)
7935 // TODO-CQ: CLONE: Implement.
7936 return WALK_SKIP_SUBTREES;
7938 return WALK_CONTINUE;
7941 struct optRangeCheckDsc
7943 Compiler* pCompiler;
7947 Walk to make sure that only locals and constants are contained in the index
7950 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7952 GenTreePtr tree = *pTree;
7953 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7955 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7957 pData->bValidIndex = false;
7961 if (tree->gtOper == GT_LCL_VAR)
7963 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7965 pData->bValidIndex = false;
7970 return WALK_CONTINUE;
7974 returns true if a range check can legally be removed (for the moment it checks
7975 that the array is a local array (non subject to racing conditions) and that the
7976 index is either a constant or a local
7978 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7980 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7981 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7982 GenTreePtr pArray = bndsChk->GetArray();
7983 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
7987 GenTreePtr pIndex = bndsChk->gtIndex;
7989 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
7990 // Otherwise we can be targeted by malicious race-conditions.
7991 if (pArray != nullptr)
7993 if (pArray->gtOper != GT_LCL_VAR)
7999 printf("Can't remove range check if the array isn't referenced with a local\n");
8007 noway_assert(pArray->gtType == TYP_REF);
8008 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8010 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8012 // If the array address has been taken, don't do the optimization
8013 // (this restriction can be lowered a bit, but i don't think it's worth it)
8014 CLANG_FORMAT_COMMENT_ANCHOR;
8018 printf("Can't remove range check if the array has its address taken\n");
8027 optRangeCheckDsc Data;
8028 Data.pCompiler = this;
8029 Data.bValidIndex = true;
8031 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8033 if (!Data.bValidIndex)
8038 printf("Can't remove range check with this index");
8049 /******************************************************************************
8051 * Replace x==null with (x|x)==0 if x is a GC-type.
8052 * This will stress code-gen and the emitter to make sure they support such trees.
8057 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8059 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8064 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8065 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8067 noway_assert(condStmt->gtOper == GT_JTRUE);
8072 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8074 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8079 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8084 GenTreePtr comparandClone = gtCloneExpr(comparand);
8086 // Bump up the ref-counts of any variables in 'comparandClone'
8087 compCurBB = condBlock;
8088 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8090 noway_assert(relop->gtOp.gtOp1 == comparand);
8091 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8092 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8094 // Comparand type is already checked, and we have const int, there is no harm
8095 // morphing it into a TYP_I_IMPL.
8096 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8097 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8102 /******************************************************************************
8103 * Function used by folding of boolean conditionals
8104 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8105 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8106 * being a boolean lclVar and "op2" the const 0/1.
8107 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8108 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8109 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8110 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8111 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8114 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8116 bool isBool = false;
8118 noway_assert(condBranch->gtOper == GT_JTRUE);
8119 GenTree* cond = condBranch->gtOp.gtOp1;
8121 /* The condition must be "!= 0" or "== 0" */
8123 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8128 /* Return the compare node to the caller */
8132 /* Get hold of the comparands */
8134 GenTree* opr1 = cond->gtOp.gtOp1;
8135 GenTree* opr2 = cond->gtOp.gtOp2;
8137 if (opr2->gtOper != GT_CNS_INT)
8142 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8147 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8149 /* Is the value a boolean?
8150 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8151 * a local variable that is marked as being boolean (lvIsBoolean) */
8153 if (opr1->gtFlags & GTF_BOOLEAN)
8157 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8161 else if (opr1->gtOper == GT_LCL_VAR)
8163 /* is it a boolean local variable */
8165 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8166 noway_assert(lclNum < lvaCount);
8168 if (lvaTable[lclNum].lvIsBoolean)
8174 /* Was our comparison against the constant 1 (i.e. true) */
8177 // If this is a boolean expression tree we can reverse the relop
8178 // and change the true to false.
8181 gtReverseCond(cond);
8182 opr2->gtIntCon.gtIconVal = 0;
8194 void Compiler::optOptimizeBools()
8199 printf("*************** In optOptimizeBools()\n");
8202 printf("Blocks/Trees before phase\n");
8203 fgDispBasicBlocks(true);
8213 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8215 /* We're only interested in conditional jumps here */
8217 if (b1->bbJumpKind != BBJ_COND)
8222 /* If there is no next block, we're done */
8224 BasicBlock* b2 = b1->bbNext;
8230 /* The next block must not be marked as BBF_DONT_REMOVE */
8231 if (b2->bbFlags & BBF_DONT_REMOVE)
8236 /* The next block also needs to be a condition */
8238 if (b2->bbJumpKind != BBJ_COND)
8241 optOptimizeBoolsGcStress(b1);
8246 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8248 if (b1->bbJumpDest == b2->bbJumpDest)
8250 /* Given the following sequence of blocks :
8254 we wil try to fold it to :
8255 B1: brtrue(t1|t2, BX)
8261 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8263 /* Given the following sequence of blocks :
8267 we will try to fold it to :
8268 B1: brtrue((!t1)&&t2, B3)
8279 /* The second block must contain a single statement */
8281 GenTreePtr s2 = b2->bbTreeList;
8282 if (s2->gtPrev != s2)
8287 noway_assert(s2->gtOper == GT_STMT);
8288 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8289 noway_assert(t2->gtOper == GT_JTRUE);
8291 /* Find the condition for the first block */
8293 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8295 noway_assert(s1->gtOper == GT_STMT);
8296 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8297 noway_assert(t1->gtOper == GT_JTRUE);
8299 if (b2->countOfInEdges() > 1)
8304 /* Find the branch conditions of b1 and b2 */
8308 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8314 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8320 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8321 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8323 // Leave out floats where the bit-representation is more complicated
8324 // - there are two representations for 0.
8326 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8331 // Make sure the types involved are of the same sizes
8332 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8336 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8340 #ifdef _TARGET_ARMARCH_
8341 // Skip the small operand which we cannot encode.
8342 if (varTypeIsSmall(c1->TypeGet()))
8345 /* The second condition must not contain side effects */
8347 if (c2->gtFlags & GTF_GLOB_EFFECT)
8352 /* The second condition must not be too expensive */
8356 if (c2->gtCostEx > 12)
8363 var_types foldType = c1->TypeGet();
8364 if (varTypeIsGC(foldType))
8366 foldType = TYP_I_IMPL;
8371 /* Both conditions must be the same */
8373 if (t1->gtOper != t2->gtOper)
8378 if (t1->gtOper == GT_EQ)
8380 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8381 So we will branch to BX if (c1&c2)==0 */
8388 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8389 So we will branch to BX if (c1|c2)!=0 */
8397 /* The b1 condition must be the reverse of the b2 condition */
8399 if (t1->gtOper == t2->gtOper)
8404 if (t1->gtOper == GT_EQ)
8406 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8407 So we will branch to BX if (c1&c2)!=0 */
8414 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8415 So we will branch to BX if (c1|c2)==0 */
8422 // Anding requires both values to be 0 or 1
8424 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8430 // Now update the trees
8432 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8435 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8436 cmpOp1->gtFlags |= GTF_BOOLEAN;
8440 t1->gtOp.gtOp1 = cmpOp1;
8441 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8443 #if FEATURE_SET_FLAGS
8444 // For comparisons against zero we will have the GTF_SET_FLAGS set
8445 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8446 // during the CSE phase.
8448 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8449 // as they are no longer feeding directly into a comparisons against zero
8451 // Make sure that the GTF_SET_FLAGS bit is cleared.
8452 // Fix 388436 ARM JitStress WP7
8453 c1->gtFlags &= ~GTF_SET_FLAGS;
8454 c2->gtFlags &= ~GTF_SET_FLAGS;
8456 // The new top level node that we just created does feed directly into
8457 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8458 // we generate an instuction that sets the flags, which allows us
8459 // to omit the cmp with zero instruction.
8461 // Request that the codegen for cmpOp1 sets the condition flags
8462 // when it generates the code for cmpOp1.
8464 cmpOp1->gtRequestSetFlags();
8467 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8470 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8474 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8478 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8480 fgRemoveRefPred(b1->bbJumpDest, b1);
8482 b1->bbJumpDest = b2->bbJumpDest;
8484 fgAddRefPred(b2->bbJumpDest, b1);
8487 noway_assert(edge1 != nullptr);
8488 noway_assert(edge2 != nullptr);
8490 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8491 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8492 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8494 edge1->flEdgeWeightMin = edgeSumMin;
8495 edge1->flEdgeWeightMax = edgeSumMax;
8499 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8500 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8503 /* Get rid of the second block (which is a BBJ_COND) */
8505 noway_assert(b1->bbJumpKind == BBJ_COND);
8506 noway_assert(b2->bbJumpKind == BBJ_COND);
8507 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8508 noway_assert(b1->bbNext == b2);
8509 noway_assert(b2->bbNext);
8512 b2->bbFlags |= BBF_REMOVED;
8514 // If b2 was the last block of a try or handler, update the EH table.
8516 ehUpdateForDeletedBlock(b2);
8518 /* Update bbRefs and bbPreds */
8520 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8521 * Remove pred 'b2' for 'b2->bbJumpDest' */
8523 fgReplacePred(b2->bbNext, b2, b1);
8525 fgRemoveRefPred(b2->bbJumpDest, b2);
8527 /* Update the block numbers and try again */
8539 // Update loop table
8540 fgUpdateLoopsAfterCompacting(b1, b2);
8545 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8546 b1->bbNum, b2->bbNum);
8555 fgDebugCheckBBlist();