1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
826 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
828 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
830 else if (limitOp->gtOper == GT_ARR_LENGTH)
832 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
838 // Save the type of the comparison between the iterator and the limit.
839 optLoopTable[loopInd].lpTestTree = relop;
843 //----------------------------------------------------------------------------------
844 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
847 // incr The incr tree to be checked. Whether incr tree is
848 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
851 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
852 // and the rhs limit is extracted from the "test" tree. The limit information is
853 // added to the loop table.
856 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
858 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
861 genTreeOps updateOper;
862 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
863 if (iterVar != BAD_VAR_NUM)
865 // We have v = v op y type asg node.
878 // Increment should be by a const int.
879 // TODO-CQ: CLONE: allow variable increments.
880 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
889 //----------------------------------------------------------------------------------
890 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
893 // from, to - are blocks (beg, end) which are part of the loop.
894 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
895 // pIterVar - see return value.
898 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
902 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
903 // assigned in the loop.
905 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
908 unsigned iterVar = optIsLoopIncrTree(incr);
909 if (iterVar == BAD_VAR_NUM)
913 if (optIsVarAssigned(from, to, incr, iterVar))
915 JITDUMP("iterVar is assigned in loop\n");
923 //----------------------------------------------------------------------------------
924 // optIsLoopTestEvalIntoTemp:
925 // Pattern match if the test tree is computed into a tmp
926 // and the "tmp" is used as jump condition for loop termination.
929 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
930 // where Vtmp contains the actual loop test result.
931 // newStmt - contains the statement that is the actual test stmt involving
932 // the loop iterator.
935 // Returns true if a new test tree can be obtained.
938 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
939 // Then returns the rhs for def of Vtmp as the "test" node.
942 // This method just retrieves what it thinks is the "test" node,
943 // the callers are expected to verify that "iterVar" is used in the test.
945 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
947 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
949 if (test->gtOper != GT_JTRUE)
954 GenTreePtr relop = test->gtGetOp1();
955 noway_assert(relop->OperIsCompare());
957 GenTreePtr opr1 = relop->gtOp.gtOp1;
958 GenTreePtr opr2 = relop->gtOp.gtOp2;
960 // Make sure we have jtrue (vtmp != 0)
961 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
962 opr2->IsIntegralConst(0))
964 // Get the previous statement to get the def (rhs) of Vtmp to see
965 // if the "test" is evaluated into Vtmp.
966 GenTreePtr prevStmt = testStmt->gtPrev;
967 if (prevStmt == nullptr)
972 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
973 if (tree->OperGet() == GT_ASG)
975 GenTreePtr lhs = tree->gtOp.gtOp1;
976 GenTreePtr rhs = tree->gtOp.gtOp2;
978 // Return as the new test node.
979 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
981 if (rhs->OperIsCompare())
992 //----------------------------------------------------------------------------------
993 // optExtractInitTestIncr:
994 // Extract the "init", "test" and "incr" nodes of the loop.
997 // head - Loop head block
998 // bottom - Loop bottom block
999 // top - Loop top block
1000 // ppInit - The init stmt of the loop if found.
1001 // ppTest - The test stmt of the loop if found.
1002 // ppIncr - The incr stmt of the loop if found.
1005 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1006 // returns true. Returns false if the information can't be extracted.
1009 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1010 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1011 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1014 // +-------<-----------------<-----------+
1017 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1019 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1020 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1021 // in the jtrue condition.
1024 // This method just retrieves what it thinks is the "test" node,
1025 // the callers are expected to verify that "iterVar" is used in the test.
1027 bool Compiler::optExtractInitTestIncr(
1028 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1030 assert(ppInit != nullptr);
1031 assert(ppTest != nullptr);
1032 assert(ppIncr != nullptr);
1034 // Check if last two statements in the loop body are the increment of the iterator
1035 // and the loop termination test.
1036 noway_assert(bottom->bbTreeList != nullptr);
1037 GenTreePtr test = bottom->bbTreeList->gtPrev;
1038 noway_assert(test != nullptr && test->gtNext == nullptr);
1041 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1046 // Check if we have the incr tree before the test tree, if we don't,
1047 // check if incr is part of the loop "top".
1048 GenTreePtr incr = test->gtPrev;
1049 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1051 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1056 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1057 GenTreePtr topLast = top->bbTreeList->gtPrev;
1058 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1068 assert(test != incr);
1070 // Find the last statement in the loop pre-header which we expect to be the initialization of
1071 // the loop iterator.
1072 GenTreePtr phdr = head->bbTreeList;
1073 if (phdr == nullptr)
1078 GenTreePtr init = phdr->gtPrev;
1079 noway_assert(init != nullptr && (init->gtNext == nullptr));
1081 // If it is a duplicated loop condition, skip it.
1082 if (init->gtFlags & GTF_STMT_CMPADD)
1084 // Must be a duplicated loop condition.
1085 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1086 init = init->gtPrev;
1087 noway_assert(init != nullptr);
1090 noway_assert(init->gtOper == GT_STMT);
1091 noway_assert(test->gtOper == GT_STMT);
1092 noway_assert(incr->gtOper == GT_STMT);
1094 *ppInit = init->gtStmt.gtStmtExpr;
1095 *ppTest = test->gtStmt.gtStmtExpr;
1096 *ppIncr = incr->gtStmt.gtStmtExpr;
1101 /*****************************************************************************
1103 * Record the loop in the loop table.
1106 void Compiler::optRecordLoop(BasicBlock* head,
1112 unsigned char exitCnt)
1114 // Record this loop in the table, if there's room.
1116 assert(optLoopCount <= MAX_LOOP_NUM);
1117 if (optLoopCount == MAX_LOOP_NUM)
1120 loopOverflowThisMethod = true;
1125 // Assumed preconditions on the loop we're adding.
1126 assert(first->bbNum <= top->bbNum);
1127 assert(top->bbNum <= entry->bbNum);
1128 assert(entry->bbNum <= bottom->bbNum);
1129 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1131 // If the new loop contains any existing ones, add it in the right place.
1132 unsigned char loopInd = optLoopCount;
1133 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1135 unsigned char prev = prevPlus1 - 1;
1136 if (optLoopTable[prev].lpContainedBy(first, bottom))
1141 // Move up any loops if necessary.
1142 for (unsigned j = optLoopCount; j > loopInd; j--)
1144 optLoopTable[j] = optLoopTable[j - 1];
1148 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1150 // The loop is well-formed.
1151 assert(optLoopTable[i].lpWellFormed());
1152 // Check for disjoint.
1153 if (optLoopTable[i].lpDisjoint(first, bottom))
1157 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1158 assert(optLoopTable[i].lpContainedBy(first, bottom));
1162 optLoopTable[loopInd].lpHead = head;
1163 optLoopTable[loopInd].lpFirst = first;
1164 optLoopTable[loopInd].lpTop = top;
1165 optLoopTable[loopInd].lpBottom = bottom;
1166 optLoopTable[loopInd].lpEntry = entry;
1167 optLoopTable[loopInd].lpExit = exit;
1168 optLoopTable[loopInd].lpExitCnt = exitCnt;
1170 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1171 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1172 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1174 optLoopTable[loopInd].lpFlags = 0;
1176 // We haven't yet recorded any side effects.
1177 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1178 optLoopTable[loopInd].lpFieldsModified = nullptr;
1179 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1181 // If DO-WHILE loop mark it as such.
1182 if (head->bbNext == entry)
1184 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1187 // If single exit loop mark it as such.
1191 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1195 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1196 // We have the following restrictions:
1197 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1198 // 2. There must be a loop iterator (a local var) that is
1199 // incremented (decremented or lsh, rsh, mul) with a constant value
1200 // 3. The iterator is incremented exactly once
1201 // 4. The loop condition must use the iterator.
1203 if (bottom->bbJumpKind == BBJ_COND)
1208 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1213 unsigned iterVar = BAD_VAR_NUM;
1214 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1219 // Make sure the "iterVar" initialization is never skipped,
1220 // i.e. HEAD dominates the ENTRY.
1221 if (!fgDominate(head, entry))
1226 if (!optPopulateInitInfo(loopInd, init, iterVar))
1231 // Check that the iterator is used in the loop condition.
1232 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1237 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1238 // Record the iterator, the pointer to the test node
1239 // and the initial value of the iterator (constant or local var)
1240 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1243 optLoopTable[loopInd].lpIterTree = incr;
1246 // Save the initial value of the iterator - can be lclVar or constant
1247 // Flag the loop accordingly.
1253 simpleTestLoopCount++;
1256 // Check if a constant iteration loop.
1257 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1259 // This is a constant loop.
1260 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1262 constIterLoopCount++;
1269 printf("\nConstant loop initializer:\n");
1272 printf("\nConstant loop body:\n");
1274 BasicBlock* block = head;
1277 block = block->bbNext;
1278 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1280 if (stmt->gtStmt.gtStmtExpr == incr)
1285 gtDispTree(stmt->gtStmt.gtStmtExpr);
1287 } while (block != bottom);
1293 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1298 //------------------------------------------------------------------------
1299 // optPrintLoopRecording: Print a recording of the loop.
1302 // loopInd - loop index.
1304 void Compiler::optPrintLoopRecording(unsigned loopInd)
1306 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1307 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1308 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1309 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1310 optLoopTable[loopInd].lpExit);
1312 // If an iterator loop print the iterator and the initialization.
1313 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1315 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1317 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1319 printf("%d )", optLoopTable[loopInd].lpIterConst());
1321 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1323 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1325 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1327 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1330 // If a simple test condition print operator and the limits */
1331 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1333 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1335 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1338 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1340 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1349 void Compiler::optCheckPreds()
1352 BasicBlock* blockPred;
1355 for (block = fgFirstBB; block; block = block->bbNext)
1357 for (pred = block->bbPreds; pred; pred = pred->flNext)
1359 // make sure this pred is part of the BB list
1360 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1362 if (blockPred == pred->flBlock)
1367 noway_assert(blockPred);
1368 switch (blockPred->bbJumpKind)
1371 if (blockPred->bbJumpDest == block)
1377 noway_assert(blockPred->bbNext == block);
1379 case BBJ_EHFILTERRET:
1381 case BBJ_EHCATCHRET:
1382 noway_assert(blockPred->bbJumpDest == block);
1393 /*****************************************************************************
1394 * Find the natural loops, using dominators. Note that the test for
1395 * a loop is slightly different from the standard one, because we have
1396 * not done a depth first reordering of the basic blocks.
1399 void Compiler::optFindNaturalLoops()
1404 printf("*************** In optFindNaturalLoops()\n");
1410 flowList* predEntry;
1412 noway_assert(fgDomsComputed);
1416 hasMethodLoops = false;
1417 loopsThisMethod = 0;
1418 loopOverflowThisMethod = false;
1421 /* We will use the following terminology:
1422 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1423 Not part of the looping of the loop.
1424 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1425 * but not the outer loop. ???)
1426 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1427 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1428 * EXIT - the loop exit or the block right after the bottom
1429 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1431 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1465 unsigned char exitCount;
1467 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1473 // Blocks that are rarely run have a zero bbWeight and should
1474 // never be optimized here
1476 if (top->bbWeight == BB_ZERO_WEIGHT)
1481 for (pred = top->bbPreds; pred; pred = pred->flNext)
1483 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1484 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1485 * as the definition says, but merely an indication that we have a loop there).
1486 * Thus, we have to be very careful and after entry discovery check that it is indeed
1487 * the only place we enter the loop (especially for non-reducible flow graphs).
1490 bottom = pred->flBlock;
1493 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1495 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1496 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1497 (bottom->bbJumpKind == BBJ_SWITCH))
1499 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1500 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1504 BasicBlock* loopBlock;
1506 /* The presence of a "back edge" is an indication that a loop might be present here
1509 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1510 * node in the loop to any other node in the loop (wholly within the loop)
1511 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1512 * in the loop from outside the loop, and that is through the ENTRY
1515 /* Let's find the loop ENTRY */
1517 if (head->bbJumpKind == BBJ_ALWAYS)
1519 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1521 /* OK - we enter somewhere within the loop */
1522 entry = head->bbJumpDest;
1524 /* some useful asserts
1525 * Cannot enter at the top - should have being caught by redundant jumps */
1527 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1531 /* special case - don't consider now */
1532 // assert (!"Loop entered in weird way!");
1536 // Can we fall through into the loop?
1537 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1539 /* The ENTRY is at the TOP (a do-while loop) */
1544 goto NO_LOOP; // head does not flow into the loop bail for now
1547 // Now we find the "first" block -- the earliest block reachable within the loop.
1548 // This is usually the same as "top", but can differ in rare cases where "top" is
1549 // the entry block of a nested loop, and that nested loop branches backwards to a
1550 // a block before "top". We find this by searching for such backwards branches
1551 // in the loop known so far.
1552 BasicBlock* first = top;
1553 BasicBlock* newFirst;
1554 bool blocksToSearch = true;
1555 BasicBlock* validatedAfter = bottom->bbNext;
1556 while (blocksToSearch)
1558 blocksToSearch = false;
1560 blocksToSearch = false;
1561 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1563 unsigned nSucc = loopBlock->NumSucc();
1564 for (unsigned j = 0; j < nSucc; j++)
1566 BasicBlock* succ = loopBlock->GetSucc(j);
1567 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1568 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1574 if (newFirst != nullptr)
1576 validatedAfter = first;
1578 blocksToSearch = true;
1582 // Is "head" still before "first"? If not, we don't have a valid loop...
1583 if (head->bbNum >= first->bbNum)
1586 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1587 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1591 /* Make sure ENTRY dominates all blocks in the loop
1592 * This is necessary to ensure condition 2. above
1593 * At the same time check if the loop has a single exit
1594 * point - those loops are easier to optimize */
1596 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1598 if (!fgDominate(entry, loopBlock))
1603 if (loopBlock == bottom)
1605 if (bottom->bbJumpKind != BBJ_ALWAYS)
1607 /* there is an exit at the bottom */
1609 noway_assert(bottom->bbJumpDest == top);
1616 BasicBlock* exitPoint;
1618 switch (loopBlock->bbJumpKind)
1621 case BBJ_CALLFINALLY:
1623 case BBJ_EHCATCHRET:
1624 assert(loopBlock->bbJumpDest);
1625 exitPoint = loopBlock->bbJumpDest;
1627 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1629 /* exit from a block other than BOTTOM */
1638 case BBJ_EHFINALLYRET:
1639 case BBJ_EHFILTERRET:
1640 /* The "try" associated with this "finally" must be in the
1641 * same loop, so the finally block will return control inside the loop */
1646 /* those are exits from the loop */
1654 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1655 BasicBlock** jumpTab;
1656 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1660 noway_assert(*jumpTab);
1661 exitPoint = *jumpTab;
1663 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1668 } while (++jumpTab, --jumpCnt);
1672 noway_assert(!"Unexpected bbJumpKind");
1677 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1678 * This is to ensure condition 1. above which prevents marking fake loops
1680 * Below is an example:
1688 * The example above is not a loop since we bail after the first iteration
1690 * The condition we have to check for is
1691 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1692 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1694 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1695 * loop bottom then we cannot iterate
1697 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1698 * part of the loop nodes (as per definition they are loop exits executed only once),
1699 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1701 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1703 switch (loopBlock->bbJumpKind)
1708 if (fgDominate(loopBlock, bottom))
1717 bool canIterateLoop = false;
1719 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1721 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1723 canIterateLoop = true;
1726 else if (predEntry->flBlock != head)
1728 // The entry block has multiple predecessors outside the loop; the 'head'
1729 // block isn't the only one. We only support a single 'head', so bail.
1734 if (!canIterateLoop)
1739 /* Double check - make sure that all loop blocks except ENTRY
1740 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1741 * us from considering non-loops due to incorrectly assuming that we had a back edge
1744 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1747 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1749 if (loopBlock == entry)
1754 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1756 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1758 // noway_assert(!"Found loop with multiple entries");
1764 // Disqualify loops where the first block of the loop is less nested in EH than
1765 // the bottom block. That is, we don't want to handle loops where the back edge
1766 // goes from within an EH region to a first block that is outside that same EH
1767 // region. Note that we *do* handle loops where the first block is the *first*
1768 // block of a more nested EH region (since it is legal to branch to the first
1769 // block of an immediately more nested EH region). So, for example, disqualify
1776 // BB10 BBJ_COND => BB02
1780 // Here, BB10 is more nested than BB02.
1782 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1784 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1786 first->bbNum, bottom->bbNum);
1790 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1791 // Disqualify loops where the first block of the loop is a finally target.
1792 // The main problem is when multiple loops share a 'first' block that is a finally
1793 // target and we canonicalize the loops by adding a new loop head. In that case, we
1794 // need to update the blocks so the finally target bit is moved to the newly created
1795 // block, and removed from the old 'first' block. This is 'hard', so at this point
1796 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1797 // long-term), it's easier to disallow the loop than to update the flow graph to
1798 // support this case.
1800 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1802 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1805 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1807 /* At this point we have a loop - record it in the loop table
1808 * If we found only one exit, record it in the table too
1809 * (otherwise an exit = 0 in the loop table means multiple exits) */
1816 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1819 if (!hasMethodLoops)
1821 /* mark the method as containing natural loops */
1823 hasMethodLoops = true;
1826 /* increment total number of loops found */
1830 /* keep track of the number of exits */
1831 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1832 #endif // COUNT_LOOPS
1835 /* current predecessor not good for a loop - continue with another one, if any */
1841 loopCountTable.record(loopsThisMethod);
1842 if (maxLoopsPerMethod < loopsThisMethod)
1844 maxLoopsPerMethod = loopsThisMethod;
1846 if (loopOverflowThisMethod)
1848 totalLoopOverflows++;
1850 #endif // COUNT_LOOPS
1852 // Now the loop indices are stable. We can figure out parent/child relationships
1853 // (using table indices to name loops), and label blocks.
1854 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1856 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1859 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1861 optLoopTable[loopInd].lpParent = possibleParent;
1862 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1863 optLoopTable[possibleParent].lpChild = loopInd;
1869 // Now label the blocks with the innermost loop to which they belong. Since parents
1870 // precede children in the table, doing the labeling for each loop in order will achieve
1871 // this -- the innermost loop labeling will be done last.
1872 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1874 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1875 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1876 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1878 blk->bbNatLoopNum = loopInd;
1883 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1887 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1888 // one, if necessary, for loops containing others that share a "top."
1890 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1892 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1893 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1899 if (optCanonicalizeLoopNest(loopInd))
1906 fgUpdateChangedFlowGraph();
1910 if (verbose && optLoopCount > 0)
1912 printf("\nFinal natural loop table:\n");
1913 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1915 optPrintLoopInfo(loopInd);
1922 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1924 BasicBlock* newJumpDest = nullptr;
1925 switch (blk->bbJumpKind)
1930 case BBJ_EHFILTERRET:
1931 case BBJ_EHFINALLYRET:
1932 case BBJ_EHCATCHRET:
1933 // These have no jump destination to update.
1938 case BBJ_CALLFINALLY:
1940 // All of these have a single jump destination to update.
1941 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1943 blk->bbJumpDest = newJumpDest;
1949 bool redirected = false;
1950 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1952 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1954 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1958 // If any redirections happend, invalidate the switch table map for the switch.
1961 GetSwitchDescMap()->Remove(blk);
1971 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1972 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1974 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1976 // copy the jump destination(s) from "from" to "to".
1977 switch (to->bbJumpKind)
1981 case BBJ_CALLFINALLY:
1983 // All of these have a single jump destination to update.
1984 to->bbJumpDest = from->bbJumpDest;
1989 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1990 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1991 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
1993 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
1995 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2005 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2006 // Returns 'true' if the flow graph is modified.
2007 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2009 bool modified = false;
2011 // Is the top of the current loop not in any nested loop?
2012 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2014 if (optCanonicalizeLoop(loopInd))
2020 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2021 child = optLoopTable[child].lpSibling)
2023 if (optCanonicalizeLoopNest(child))
2032 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2034 // Is the top uniquely part of the current loop?
2035 BasicBlock* t = optLoopTable[loopInd].lpTop;
2037 if (t->bbNatLoopNum == loopInd)
2042 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2044 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2046 // Otherwise, the top of this loop is also part of a nested loop.
2048 // Insert a new unique top for this loop. We must be careful to put this new
2049 // block in the correct EH region. Note that f->bbPrev might be in a different
2050 // EH region. For example:
2058 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2059 // On the other hand, the first block of multiple loops might be the first
2060 // block of a 'try' region that is completely contained in the multiple loops.
2065 // BB10 BBJ_ALWAYS => BB08
2067 // BB12 BBJ_ALWAYS => BB08
2069 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2070 // is a single-block "try" region. Neither loop "bottom" block is in the same
2071 // "try" region as BB08. This is legal because you can jump to the first block
2072 // of a try region. With EH normalization, no two "try" regions will share
2073 // this block. In this case, we need to insert a new block for the outer loop
2074 // in the same EH region as the branch from the "bottom":
2079 // BB10 BBJ_ALWAYS => BB08
2081 // BB12 BBJ_ALWAYS => BB30
2083 // Another possibility is that the "first" block of the loop nest can be the first block
2084 // of a "try" region that also has other predecessors than those in the loop, or even in
2085 // the "try" region (since blocks can target the first block of a "try" region). For example:
2089 // BB10 BBJ_ALWAYS => BB08
2091 // BB12 BBJ_ALWAYS => BB08
2094 // BB20 BBJ_ALWAYS => BB08
2096 // BB25 BBJ_ALWAYS => BB08
2098 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2099 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2100 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2101 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2102 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2103 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2104 // situation of branching to a non-first block of a 'try' region.
2106 // We can also have a loop nest where the "first" block is outside of a "try" region
2107 // and the back edges are inside a "try" region, for example:
2111 // BB09 try { BBJ_COND => BB02
2113 // BB15 BBJ_COND => BB02
2115 // BB21 } // end of "try"
2117 // In this case, both loop back edges were formed by "leave" instructions that were
2118 // imported into branches that were later made conditional. In this case, we don't
2119 // want to copy the EH region of the back edge, since that would create a block
2120 // outside of and disjoint with the "try" region of the back edge. However, to
2121 // simplify things, we disqualify this type of loop, so we should never see this here.
2123 BasicBlock* h = optLoopTable[loopInd].lpHead;
2124 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2125 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2127 // The loop must be entirely contained within a single handler region.
2128 assert(BasicBlock::sameHndRegion(f, b));
2130 // If the bottom block is in the same "try" region, then we extend the EH
2131 // region. Otherwise, we add the new block outside the "try" region.
2132 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2133 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2136 // We need to set the EH region manually. Set it to be the same
2137 // as the bottom block.
2138 newT->copyEHRegion(b);
2141 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2143 // Redirect the "bottom" of the current loop to "newT".
2144 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2145 blockMap->Set(t, newT);
2146 optRedirectBlock(b, blockMap);
2148 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2149 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2150 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2151 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2154 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2155 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2156 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2157 // edge of the blockMap, so nothing will happen.
2158 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2160 BasicBlock* topPredBlock = topPred->flBlock;
2162 // Skip if topPredBlock is in the loop.
2163 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2164 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2165 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2166 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2168 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2169 "redirecting its bottom edge\n",
2170 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2174 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2176 optRedirectBlock(topPredBlock, blockMap);
2179 assert(newT->bbNext == f);
2182 newT->bbJumpKind = BBJ_ALWAYS;
2183 newT->bbJumpDest = t;
2184 newT->bbTreeList = nullptr;
2185 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2188 // If it had been a do-while loop (top == entry), update entry, as well.
2189 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2190 if (optLoopTable[loopInd].lpTop == origE)
2192 optLoopTable[loopInd].lpEntry = newT;
2194 optLoopTable[loopInd].lpTop = newT;
2195 optLoopTable[loopInd].lpFirst = newT;
2197 newT->bbNatLoopNum = loopInd;
2199 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2200 dspPtr(newT), loopInd);
2202 // Make sure the head block still goes to the entry...
2203 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2205 h->bbJumpKind = BBJ_ALWAYS;
2206 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2208 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2210 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2211 optLoopTable[loopInd].lpHead = h2;
2212 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2213 h2->bbTreeList = nullptr;
2214 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2217 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2218 // it must be the case that they were do-while's (since "h" fell through to the entry).
2219 // The new node "newT" becomes the head of such loops.
2220 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2221 childLoop = optLoopTable[childLoop].lpSibling)
2223 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2224 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2226 optUpdateLoopHead(childLoop, h, newT);
2232 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2234 assert(l1 != BasicBlock::NOT_IN_LOOP);
2239 else if (l2 == BasicBlock::NOT_IN_LOOP)
2245 return optLoopContains(l1, optLoopTable[l2].lpParent);
2249 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2251 assert(optLoopTable[loopInd].lpHead == from);
2252 optLoopTable[loopInd].lpHead = to;
2253 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2254 childLoop = optLoopTable[childLoop].lpSibling)
2256 if (optLoopTable[childLoop].lpHead == from)
2258 optUpdateLoopHead(childLoop, from, to);
2263 /*****************************************************************************
2264 * If the : i += const" will cause an overflow exception for the small types.
2267 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2274 type_MAX = SCHAR_MAX;
2277 type_MAX = UCHAR_MAX;
2280 type_MAX = SHRT_MAX;
2283 type_MAX = USHRT_MAX;
2286 case TYP_UINT: // Detected by checking for 32bit ....
2288 return false; // ... overflow same as done for TYP_INT
2294 if (iterAtExit > type_MAX)
2304 /*****************************************************************************
2305 * If the "i -= const" will cause an underflow exception for the small types
2308 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2315 type_MIN = SCHAR_MIN;
2318 type_MIN = SHRT_MIN;
2327 case TYP_UINT: // Detected by checking for 32bit ....
2329 return false; // ... underflow same as done for TYP_INT
2335 if (iterAtExit < type_MIN)
2345 /*****************************************************************************
2347 * Helper for unroll loops - Computes the number of repetitions
2348 * in a constant loop. If it cannot prove the number is constant returns false
2351 bool Compiler::optComputeLoopRep(int constInit,
2354 genTreeOps iterOper,
2355 var_types iterOperType,
2356 genTreeOps testOper,
2359 unsigned* iterCount)
2361 noway_assert(genActualType(iterOperType) == TYP_INT);
2364 __int64 constLimitX;
2369 // Using this, we can just do a signed comparison with other 32 bit values.
2372 constLimitX = (unsigned int)constLimit;
2376 constLimitX = (signed int)constLimit;
2379 switch (iterOperType)
2381 // For small types, the iteration operator will narrow these values if big
2383 #define INIT_ITER_BY_TYPE(type) \
2384 constInitX = (type)constInit; \
2385 iterInc = (type)iterInc;
2388 INIT_ITER_BY_TYPE(signed char);
2391 INIT_ITER_BY_TYPE(unsigned char);
2394 INIT_ITER_BY_TYPE(signed short);
2397 INIT_ITER_BY_TYPE(unsigned short);
2400 // For the big types, 32 bit arithmetic is performed
2406 constInitX = (unsigned int)constInit;
2410 constInitX = (signed int)constInit;
2415 noway_assert(!"Bad type");
2419 /* If iterInc is zero we have an infinite loop */
2425 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2426 iterSign = (iterInc > 0) ? +1 : -1;
2428 /* Initialize loopCount to zero */
2431 // If dupCond is true then the loop head contains a test which skips
2432 // this loop, if the constInit does not pass the loop test
2433 // Such a loop can execute zero times.
2434 // If dupCond is false then we have a true do-while loop which we
2435 // always execute the loop once before performing the loop test
2439 constInitX += iterInc;
2442 // bail if count is based on wrap-around math
2445 if (constLimitX < constInitX)
2450 else if (constLimitX > constInitX)
2455 /* Compute the number of repetitions */
2459 __int64 iterAtExitX;
2462 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2466 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2467 * have a constant number of iterations or loop forever -
2468 * we have to compute (lim-init) mod iterInc to see if it is zero.
2469 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2470 * which is probably not what the end user wanted, but it is legal.
2475 /* Stepping by one, i.e. Mod with 1 is always zero */
2478 if (((constLimitX - constInitX) % iterInc) != 0)
2486 noway_assert(iterInc < 0);
2487 /* Stepping by -1, i.e. Mod with 1 is always zero */
2490 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2506 if (constInitX != constLimitX)
2508 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2511 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2515 iterAtExitX = (unsigned)iterAtExitX;
2518 // Check if iteration incr will cause overflow for small types
2519 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2524 // iterator with 32bit overflow. Bad for TYP_(U)INT
2525 if (iterAtExitX < constLimitX)
2530 *iterCount = loopCount;
2546 noway_assert(!"Unknown operator for loop iterator");
2560 if (constInitX < constLimitX)
2562 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2565 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2569 iterAtExitX = (unsigned)iterAtExitX;
2572 // Check if iteration incr will cause overflow for small types
2573 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2578 // iterator with 32bit overflow. Bad for TYP_(U)INT
2579 if (iterAtExitX < constLimitX)
2584 *iterCount = loopCount;
2600 noway_assert(!"Unknown operator for loop iterator");
2614 if (constInitX <= constLimitX)
2616 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2619 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2623 iterAtExitX = (unsigned)iterAtExitX;
2626 // Check if iteration incr will cause overflow for small types
2627 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2632 // iterator with 32bit overflow. Bad for TYP_(U)INT
2633 if (iterAtExitX <= constLimitX)
2638 *iterCount = loopCount;
2654 noway_assert(!"Unknown operator for loop iterator");
2668 if (constInitX > constLimitX)
2670 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2673 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2677 iterAtExitX = (unsigned)iterAtExitX;
2680 // Check if small types will underflow
2681 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2686 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2687 if (iterAtExitX > constLimitX)
2692 *iterCount = loopCount;
2708 noway_assert(!"Unknown operator for loop iterator");
2722 if (constInitX >= constLimitX)
2724 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2727 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2731 iterAtExitX = (unsigned)iterAtExitX;
2734 // Check if small types will underflow
2735 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2740 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2741 if (iterAtExitX >= constLimitX)
2746 *iterCount = loopCount;
2762 noway_assert(!"Unknown operator for loop iterator");
2767 noway_assert(!"Unknown operator for loop condition");
2773 /*****************************************************************************
2775 * Look for loop unrolling candidates and unroll them
2779 #pragma warning(push)
2780 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2782 void Compiler::optUnrollLoops()
2784 if (compCodeOpt() == SMALL_CODE)
2789 if (optLoopCount == 0)
2795 if (JitConfig.JitNoUnroll())
2801 if (optCanCloneLoops())
2809 printf("*************** In optUnrollLoops()\n");
2812 /* Look for loop unrolling candidates */
2814 /* Double loop so that after unrolling an inner loop we set change to true
2815 * and we then go back over all of the loop candidates and try to unroll
2816 * the next outer loop, until we don't unroll any loops,
2817 * then change will be false and we are done.
2821 bool change = false;
2823 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2837 int lbeg; // initial value for iterator
2838 int llim; // limit value for iterator
2839 unsigned lvar; // iterator lclVar #
2840 int iterInc; // value to increment the iterator
2841 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2842 var_types iterOperType; // type result of the oper (for overflow instrs)
2843 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2844 bool unsTest; // Is the comparison u/int
2846 unsigned totalIter; // total number of iterations in the constant loop
2847 unsigned loopCostSz; // Cost is size of one iteration
2848 unsigned loopFlags; // actual lpFlags
2849 unsigned requiredFlags; // required lpFlags
2851 GenTree* loopList; // new stmt list of the unrolled loop
2854 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2861 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2862 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2864 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2867 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2873 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2880 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2881 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2883 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2886 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2888 unrollLimitSz *= 10;
2892 loopFlags = optLoopTable[lnum].lpFlags;
2893 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2895 /* Ignore the loop if we don't have a do-while with a single exit
2896 that has a constant number of iterations */
2898 if ((loopFlags & requiredFlags) != requiredFlags)
2903 /* ignore if removed or marked as not unrollable */
2905 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2910 head = optLoopTable[lnum].lpHead;
2912 bottom = optLoopTable[lnum].lpBottom;
2913 noway_assert(bottom);
2915 /* The single exit must be at the bottom of the loop */
2916 noway_assert(optLoopTable[lnum].lpExit);
2917 if (optLoopTable[lnum].lpExit != bottom)
2922 /* Unrolling loops with jumps in them is not worth the headache
2923 * Later we might consider unrolling loops after un-switching */
2928 block = block->bbNext;
2929 noway_assert(block);
2931 if (block->bbJumpKind != BBJ_NONE)
2933 if (block != bottom)
2938 } while (block != bottom);
2940 /* Get the loop data:
2944 - iterator increment
2945 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2946 - loop test type (i.e. GT_GE, GT_LT, etc...)
2949 lbeg = optLoopTable[lnum].lpConstInit;
2950 llim = optLoopTable[lnum].lpConstLimit();
2951 testOper = optLoopTable[lnum].lpTestOper();
2953 lvar = optLoopTable[lnum].lpIterVar();
2954 iterInc = optLoopTable[lnum].lpIterConst();
2955 iterOper = optLoopTable[lnum].lpIterOper();
2957 iterOperType = optLoopTable[lnum].lpIterOperType();
2958 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2960 if (lvaTable[lvar].lvAddrExposed)
2961 { // If the loop iteration variable is address-exposed then bail
2964 if (lvaTable[lvar].lvIsStructField)
2965 { // If the loop iteration variable is a promoted field from a struct then
2970 /* Locate the pre-header and initialization and increment/test statements */
2972 phdr = head->bbTreeList;
2974 loop = bottom->bbTreeList;
2977 init = head->lastStmt();
2978 noway_assert(init && (init->gtNext == nullptr));
2979 test = bottom->lastStmt();
2980 noway_assert(test && (test->gtNext == nullptr));
2981 incr = test->gtPrev;
2984 if (init->gtFlags & GTF_STMT_CMPADD)
2986 /* Must be a duplicated loop condition */
2987 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2990 init = init->gtPrev;
2998 /* Find the number of iterations - the function returns false if not a constant number */
3000 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3005 /* Forget it if there are too many repetitions or not a constant loop */
3007 if (totalIter > iterLimit)
3012 noway_assert(init->gtOper == GT_STMT);
3013 init = init->gtStmt.gtStmtExpr;
3014 noway_assert(test->gtOper == GT_STMT);
3015 test = test->gtStmt.gtStmtExpr;
3016 noway_assert(incr->gtOper == GT_STMT);
3017 incr = incr->gtStmt.gtStmtExpr;
3019 // Don't unroll loops we don't understand.
3020 if (incr->gtOper == GT_ASG)
3025 /* Make sure everything looks ok */
3026 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3027 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3028 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3030 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
3031 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
3032 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3034 (test->gtOper != GT_JTRUE))
3036 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3040 /* heuristic - Estimated cost in code size of the unrolled loop */
3048 block = block->bbNext;
3050 /* Visit all the statements in the block */
3052 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3054 /* Get the expression and stop if end reached */
3056 GenTreePtr expr = stmt->gtStmtExpr;
3062 /* Calculate gtCostSz */
3063 gtSetStmtInfo(stmt);
3065 /* Update loopCostSz */
3066 loopCostSz += stmt->gtCostSz;
3068 } while (block != bottom);
3070 /* Compute the estimated increase in code size for the unrolled loop */
3072 unsigned int fixedLoopCostSz;
3073 fixedLoopCostSz = 8;
3076 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3078 /* Don't unroll if too much code duplication would result. */
3080 if (unrollCostSz > unrollLimitSz)
3082 /* prevent this loop from being revisited */
3083 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3087 /* Looks like a good idea to unroll this loop, let's do it! */
3088 CLANG_FORMAT_COMMENT_ANCHOR;
3093 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3094 if (head->bbNext->bbNum != bottom->bbNum)
3096 printf("..BB%02u", bottom->bbNum);
3098 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3099 printf(" unrollCostSz = %d\n", unrollCostSz);
3104 /* Create the unrolled loop statement list */
3106 loopList = loopLast = nullptr;
3108 for (lval = lbeg; totalIter; totalIter--)
3117 block = block->bbNext;
3118 noway_assert(block);
3120 /* Visit all the statements in the block */
3122 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3124 /* Stop if we've reached the end of the loop */
3126 if (stmt->gtStmtExpr == incr)
3131 /* Clone/substitute the expression */
3133 expr = gtCloneExpr(stmt, 0, lvar, lval);
3135 // cloneExpr doesn't handle everything
3139 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3143 /* Append the expression to our list */
3147 loopLast->gtNext = expr;
3154 expr->gtPrev = loopLast;
3157 } while (block != bottom);
3159 /* update the new value for the unrolled iterator */
3173 noway_assert(!"Unrolling not implemented for this loop iterator");
3177 noway_assert(!"Unknown operator for constant loop iterator");
3182 /* Finish the linked list */
3186 loopList->gtPrev = loopLast;
3187 loopLast->gtNext = nullptr;
3190 /* Replace the body with the unrolled one */
3196 block = block->bbNext;
3197 noway_assert(block);
3198 block->bbTreeList = nullptr;
3199 block->bbJumpKind = BBJ_NONE;
3200 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3201 } while (block != bottom);
3203 bottom->bbJumpKind = BBJ_NONE;
3204 bottom->bbTreeList = loopList;
3205 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3206 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3210 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3212 /* Update bbRefs and bbPreds */
3213 /* Here head->bbNext is bottom !!! - Replace it */
3215 fgRemoveRefPred(head->bbNext, bottom);
3217 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3218 * (the last value of the iterator in the loop)
3219 * and drop the jump condition since the unrolled loop will always execute */
3221 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3223 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3225 if (head->bbJumpKind == BBJ_COND)
3227 phdr = head->bbTreeList;
3229 test = phdr->gtPrev;
3231 noway_assert(test && (test->gtNext == nullptr));
3232 noway_assert(test->gtOper == GT_STMT);
3233 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3235 init = test->gtPrev;
3236 noway_assert(init && (init->gtNext == test));
3237 noway_assert(init->gtOper == GT_STMT);
3239 init->gtNext = nullptr;
3240 phdr->gtPrev = init;
3241 head->bbJumpKind = BBJ_NONE;
3242 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3244 /* Update bbRefs and bbPreds */
3246 fgRemoveRefPred(head->bbJumpDest, head);
3250 /* the loop must execute */
3251 noway_assert(head->bbJumpKind == BBJ_NONE);
3257 printf("Whole unrolled loop:\n");
3259 GenTreePtr s = loopList;
3263 noway_assert(s->gtOper == GT_STMT);
3274 /* Remember that something has changed */
3278 /* Make sure to update loop table */
3280 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3281 * (also make head and bottom NULL - to hit an assert or GPF) */
3283 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3284 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3296 fgDebugCheckBBlist();
3300 #pragma warning(pop)
3303 /*****************************************************************************
3305 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3306 * not execute a method call.
3309 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3311 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3312 // as some helper calls are neither interruptible nor hijackable.
3313 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3314 // those helpers too.
3316 noway_assert(topBB->bbNum <= botBB->bbNum);
3318 // We can always check topBB and botBB for any gc safe points and early out
3320 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3325 // Otherwise we will need to rely upon the dominator sets
3327 if (!fgDomsComputed)
3329 // return a conservative answer of true when we don't have the dominator sets
3333 BasicBlock* curBB = topBB;
3336 noway_assert(curBB);
3338 // If we added a loop pre-header block then we will
3339 // have a bbNum greater than fgLastBB, and we won't have
3340 // any dominator information about this block, so skip it.
3342 if (curBB->bbNum <= fgLastBB->bbNum)
3344 noway_assert(curBB->bbNum <= botBB->bbNum);
3346 // Does this block contain a gc safe point?
3348 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3350 // Will this block always execute on the way to botBB ?
3352 // Since we are checking every block in [topBB .. botBB] and we are using
3353 // a lexical definition of a loop.
3354 // (all that we know is that is that botBB is a back-edge to topBB)
3355 // Thus while walking blocks in this range we may encounter some blocks
3356 // that are not really part of the loop, and so we need to perform
3357 // some additional checks:
3359 // We will check that the current 'curBB' is reachable from 'topBB'
3360 // and that it dominates the block containing the back-edge 'botBB'
3361 // When both of these are true then we know that the gcsafe point in 'curBB'
3362 // will be encountered in the loop and we can return false
3364 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3371 // If we've reached the destination block, then we're done
3380 curBB = curBB->bbNext;
3383 // If we didn't find any blocks that contained a gc safe point and
3384 // also met the fgDominate and fgReachable criteria then we must return true
3389 /*****************************************************************************
3391 * Find the loop termination test at the bottom of the loop
3394 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3396 GenTreePtr testt = bottom->bbTreeList;
3398 assert(testt && testt->gtOper == GT_STMT);
3400 GenTreePtr result = testt->gtPrev;
3403 while (testt->gtNext)
3405 testt = testt->gtNext;
3408 assert(testt == result);
3414 /*****************************************************************************
3415 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3418 void Compiler::fgOptWhileLoop(BasicBlock* block)
3420 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3421 noway_assert(compCodeOpt() != SMALL_CODE);
3424 Optimize while loops into do { } while loop
3425 Our loop hoisting logic requires do { } while loops.
3426 Specifically, we're looking for the following case:
3437 If we find this, and the condition is simple enough, we change
3438 the loop to the following:
3443 // else fall-through
3454 /* Does the BB end with an unconditional jump? */
3456 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3457 { // It can't be one of the ones we use for our exception magic
3461 // It has to be a forward jump
3462 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3464 if (fgIsForwardBranch(block) == false)
3469 // Get hold of the jump target
3470 BasicBlock* bTest = block->bbJumpDest;
3472 // Does the block consist of 'jtrue(cond) block' ?
3473 if (bTest->bbJumpKind != BBJ_COND)
3478 // bTest must be a backwards jump to block->bbNext
3479 if (bTest->bbJumpDest != block->bbNext)
3484 // Since test is a BBJ_COND it will have a bbNext
3485 noway_assert(bTest->bbNext);
3487 // 'block' must be in the same try region as the condition, since we're going to insert
3488 // a duplicated condition in 'block', and the condition might include exception throwing code.
3489 if (!BasicBlock::sameTryRegion(block, bTest))
3494 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3495 // same try region (or no try region) to avoid generating illegal flow.
3496 BasicBlock* bTestNext = bTest->bbNext;
3497 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3502 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3504 // bTest must only contain only a jtrue with no other stmts, we will only clone
3505 // the conditional, so any other statements will not get cloned
3506 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3508 if (bTest->bbTreeList != condStmt)
3513 /* Get to the condition node from the statement tree */
3515 noway_assert(condStmt->gtOper == GT_STMT);
3517 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3518 noway_assert(condTree->gtOper == GT_JTRUE);
3520 condTree = condTree->gtOp.gtOp1;
3522 // The condTree has to be a RelOp comparison
3523 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3525 if (condTree->OperIsCompare() == false)
3530 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3532 gtPrepareCost(condTree);
3533 unsigned estDupCostSz = condTree->gtCostSz;
3535 double loopIterations = (double)BB_LOOP_WEIGHT;
3537 bool allProfileWeightsAreValid = false;
3538 BasicBlock::weight_t weightBlock = block->bbWeight;
3539 BasicBlock::weight_t weightTest = bTest->bbWeight;
3540 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3542 // If we have profile data then we calculate the number of time
3543 // the loop will iterate into loopIterations
3544 if (fgIsUsingProfileWeights())
3546 // Only rely upon the profile weight when all three of these blocks
3547 // have good profile weights
3548 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3549 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3551 allProfileWeightsAreValid = true;
3553 // If this while loop never iterates then don't bother transforming
3554 if (weightNext == 0)
3559 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3560 // if the profile weights are all valid.
3562 // weightNext is the number of time this loop iterates
3563 // weightBlock is the number of times that we enter the while loop
3564 // loopIterations is the average number of times that this loop iterates
3566 if (weightTest >= weightBlock)
3568 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3573 unsigned maxDupCostSz = 32;
3575 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3576 // set loop weights yet
3577 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3582 // If this loop iterates a lot then raise the maxDupCost
3583 if (loopIterations >= 12.0)
3587 if (loopIterations >= 96.0)
3592 // If the loop condition has a shared static helper, we really want this loop converted
3593 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3594 // be executed on every loop iteration.
3595 int countOfHelpers = 0;
3596 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3598 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3600 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3603 // If the compare has too high cost then we don't want to dup
3605 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3610 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3611 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3612 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3613 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3614 allProfileWeightsAreValid ? "true" : "false");
3623 /* Looks good - duplicate the condition test */
3625 condTree->gtFlags |= GTF_RELOP_ZTT;
3627 condTree = gtCloneExpr(condTree);
3628 gtReverseCond(condTree);
3630 // Make sure clone expr copied the flag
3631 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3633 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3635 /* Create a statement entry out of the condition and
3636 append the condition test at the end of 'block' */
3638 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3640 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3642 #ifdef DEBUGGING_SUPPORT
3643 if (opts.compDbgInfo)
3645 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3649 // Flag the block that received the copy as potentially having an array/vtable
3650 // reference if the block copied from did; this is a conservative guess.
3651 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3653 block->bbFlags |= copyFlags;
3656 // If we have profile data for all blocks and we know that we are cloning the
3657 // bTest block into block and thus changing the control flow from block so
3658 // that it no longer goes directly to bTest anymore, we have to adjust the
3659 // weight of bTest by subtracting out the weight of block.
3661 if (allProfileWeightsAreValid)
3664 // Some additional sanity checks before adjusting the weight of bTest
3666 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3668 // Get the two edge that flow out of bTest
3669 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3670 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3672 // Calculate the new weight for block bTest
3674 BasicBlock::weight_t newWeightTest =
3675 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3676 bTest->bbWeight = newWeightTest;
3678 if (newWeightTest == BB_ZERO_WEIGHT)
3680 bTest->bbFlags |= BBF_RUN_RARELY;
3681 // All out edge weights are set to zero
3682 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3683 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3684 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3685 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3689 // Update the our edge weights
3690 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3691 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3692 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3693 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3698 /* Change the block to end with a conditional jump */
3700 block->bbJumpKind = BBJ_COND;
3701 block->bbJumpDest = bTest->bbNext;
3703 /* Mark the jump dest block as being a jump target */
3704 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3706 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3708 fgAddRefPred(block->bbNext, block);
3710 fgRemoveRefPred(bTest, block);
3711 fgAddRefPred(bTest->bbNext, block);
3716 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3718 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3720 gtDispTree(copyOfCondStmt);
3726 /*****************************************************************************
3728 * Optimize the BasicBlock layout of the method
3731 void Compiler::optOptimizeLayout()
3733 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3738 printf("*************** In optOptimizeLayout()\n");
3742 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3743 fgDebugCheckBBlist();
3746 noway_assert(fgModified == false);
3748 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3750 /* Make sure the appropriate fields are initialized */
3752 if (block->bbWeight == BB_ZERO_WEIGHT)
3754 /* Zero weighted block can't have a LOOP_HEAD flag */
3755 noway_assert(block->isLoopHead() == false);
3759 assert(block->bbLoopNum == 0);
3761 if (compCodeOpt() != SMALL_CODE)
3763 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3765 fgOptWhileLoop(block);
3771 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3772 fgComputeEdgeWeights();
3775 fgUpdateFlowGraph(true);
3777 fgUpdateFlowGraph();
3780 /*****************************************************************************
3782 * Perform loop inversion, find and classify natural loops
3785 void Compiler::optOptimizeLoops()
3787 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3792 printf("*************** In optOptimizeLoops()\n");
3796 optSetBlockWeights();
3798 /* Were there any loops in the flow graph? */
3802 /* now that we have dominator information we can find loops */
3804 optFindNaturalLoops();
3806 unsigned loopNum = 0;
3808 /* Iterate over the flow graph, marking all loops */
3810 /* We will use the following terminology:
3811 * top - the first basic block in the loop (i.e. the head of the backward edge)
3812 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3813 * lastBottom - used when we have multiple back-edges to the same top
3820 for (top = fgFirstBB; top; top = top->bbNext)
3822 BasicBlock* foundBottom = nullptr;
3824 for (pred = top->bbPreds; pred; pred = pred->flNext)
3826 /* Is this a loop candidate? - We look for "back edges" */
3828 BasicBlock* bottom = pred->flBlock;
3830 /* is this a backward edge? (from BOTTOM to TOP) */
3832 if (top->bbNum > bottom->bbNum)
3837 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3839 if (top->isLoopHead() == false)
3844 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3846 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3851 /* the top block must be able to reach the bottom block */
3852 if (!fgReachable(top, bottom))
3857 /* Found a new loop, record the longest backedge in foundBottom */
3859 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3861 foundBottom = bottom;
3869 /* Mark the loop header as such */
3870 assert(FitsIn<unsigned char>(loopNum));
3871 top->bbLoopNum = (unsigned char)loopNum;
3874 /* Mark all blocks between 'top' and 'bottom' */
3876 optMarkLoopBlocks(top, foundBottom, false);
3879 // We track at most 255 loops
3883 totalUnnatLoopOverflows++;
3890 totalUnnatLoopCount += loopNum;
3898 printf("\nFound a total of %d loops.", loopNum);
3899 printf("\nAfter loop weight marking:\n");
3900 fgDispBasicBlocks();
3905 optLoopsMarked = true;
3909 //------------------------------------------------------------------------
3910 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3913 // loopNum - the current loop index for which conditions are derived.
3914 // context - data structure where all loop cloning info is kept.
3917 // "false" if conditions cannot be obtained. "true" otherwise.
3918 // The cloning conditions are updated in the "conditions"[loopNum] field
3919 // of the "context" parameter.
3922 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3923 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3924 // condition is "less than". If the initializer is "var" init then adds condition
3925 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3926 // are added to "context". These conditions are checked in the pre-header block
3927 // and the cloning choice is made.
3930 // Callers should assume AND operation is used i.e., if all conditions are
3931 // true, then take the fast path.
3933 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3935 JITDUMP("------------------------------------------------------------\n");
3936 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3938 LoopDsc* loop = &optLoopTable[loopNum];
3939 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3941 if (loop->lpTestOper() == GT_LT)
3943 // Stride conditions
3944 if (loop->lpIterConst() <= 0)
3946 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3951 if (loop->lpFlags & LPFLG_CONST_INIT)
3953 // Only allowing const init at this time.
3954 if (loop->lpConstInit < 0)
3956 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3960 else if (loop->lpFlags & LPFLG_VAR_INIT)
3963 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3964 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3965 context->EnsureConditions(loopNum)->Push(geZero);
3969 JITDUMP("> Not variable init\n");
3975 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3977 int limit = loop->lpConstLimit();
3980 JITDUMP("> limit %d is invalid\n", limit);
3983 ident = LC_Ident(limit, LC_Ident::Const);
3985 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3987 unsigned limitLcl = loop->lpVarLimit();
3988 ident = LC_Ident(limitLcl, LC_Ident::Var);
3990 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
3992 context->EnsureConditions(loopNum)->Push(geZero);
3994 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
3996 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
3997 if (!loop->lpArrLenLimit(this, index))
3999 JITDUMP("> ArrLen not matching");
4002 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4004 // Ensure that this array must be dereference-able, before executing the actual condition.
4005 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4006 context->EnsureDerefs(loopNum)->Push(array);
4010 JITDUMP("> Undetected limit\n");
4014 for (unsigned i = 0; i < optInfos->Size(); ++i)
4016 LcOptInfo* optInfo = optInfos->GetRef(i);
4017 switch (optInfo->GetOptType())
4019 case LcOptInfo::LcJaggedArray:
4022 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4023 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4024 LC_Ident arrLenIdent = LC_Ident(arrLen);
4026 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4027 context->EnsureConditions(loopNum)->Push(cond);
4029 // Ensure that this array must be dereference-able, before executing the actual condition.
4030 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4031 context->EnsureDerefs(loopNum)->Push(array);
4034 case LcOptInfo::LcMdArray:
4036 // limit <= mdArrLen
4037 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4038 LC_Condition cond(GT_LE, LC_Expr(ident),
4039 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4040 mdArrInfo->GetArrIndexForDim(getAllocator()),
4041 mdArrInfo->dim, LC_Array::None))));
4042 context->EnsureConditions(loopNum)->Push(cond);
4047 JITDUMP("Unknown opt\n");
4051 JITDUMP("Conditions: (");
4052 DBEXEC(verbose, context->PrintConditions(loopNum));
4059 //------------------------------------------------------------------------------------
4060 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4063 // loopNum - the current loop index for which conditions are derived.
4064 // context - data structure where all loop cloning info is kept.
4067 // "false" if conditions cannot be obtained. "true" otherwise.
4068 // The deref conditions are updated in the "derefConditions"[loopNum] field
4069 // of the "context" parameter.
4071 // Definition of Deref Conditions:
4072 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4073 // we should first be able to dereference "a". i.e., "a" is non-null.
4079 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4080 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4083 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4084 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4085 // be true to do the deref.
4087 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4089 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4090 // blocks each of which will branch to slow path if the condition evaluates to false.
4092 // Now, imagine a situation where we have
4093 // a[x][y][k] = 20 and a[i][j][k] = 0
4094 // also in the inner most loop where x, y are parameters, then our conditions will have
4098 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4100 // But these conditions can be checked together with conditions
4101 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4104 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4105 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4106 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4107 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4109 // This naturally yields a tree style pattern, where the nodes of the tree are
4110 // the array and indices respectively.
4126 // Notice that the variables in the same levels can have their conditions combined in the
4127 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4128 // combined with a short-circuiting AND (i.e., different basic blocks).
4131 // Construct a tree of array indices and the array which will generate the optimal
4132 // conditions for loop cloning.
4134 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4149 // In this method, we will construct such a tree by descending depth first into the array
4150 // index operation and forming a tree structure as we encounter the array or the index variables.
4152 // This tree structure will then be used to generate conditions like below:
4153 // (a != null) & (b != null) && // from the first level of the tree.
4155 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4156 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4158 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4159 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4164 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4166 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4169 // Get the dereference-able arrays.
4170 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4172 // For each array in the dereference list, construct a tree,
4173 // where the nodes are array and index variables and an edge 'u-v'
4174 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4175 // 'u-v-w' transitively if u[v][w] occurs.
4176 for (unsigned i = 0; i < deref->Size(); ++i)
4178 LC_Array& array = (*deref)[i];
4180 // First populate the array base variable.
4181 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4182 if (node == nullptr)
4184 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4188 // For each dimension (level) for the array, populate the tree with the variable
4189 // from that dimension.
4190 unsigned rank = (unsigned)array.GetDimRank();
4191 for (unsigned i = 0; i < rank; ++i)
4193 node->EnsureChildren(getAllocator());
4194 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4197 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4198 node->children->Push(tmp);
4201 // Descend one level down.
4205 // Keep the maxRank of all array dereferences.
4206 maxRank = max((int)rank, maxRank);
4212 for (unsigned i = 0; i < nodes.Size(); ++i)
4229 // First level will always yield the null-check, since it is made of the array base variables.
4230 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4231 // So add 1 after rank * 2.
4232 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4234 // Heuristic to not create too many blocks;
4240 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4241 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4242 for (unsigned i = 0; i < nodes.Size(); ++i)
4244 nodes[i]->DeriveLevelConditions(levelCond);
4247 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4252 //----------------------------------------------------------------------------
4253 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4256 // block - the block in which the helper call needs to be inserted.
4257 // insertBefore - the tree before which the helper call will be inserted.
4259 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4261 if (JitConfig.JitDebugLogLoopCloning() == 0)
4265 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4266 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4267 fgInsertStmtBefore(block, insertBefore, stmt);
4268 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4272 //------------------------------------------------------------------------
4273 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4274 // candidates gathered during the cloning phase.
4277 // loopNum - the current loop index for which the optimizations are performed.
4278 // context - data structure where all loop cloning info is kept.
4279 // dynamicPath - If true, the optimization is performed in the fast path among the
4280 // cloned loops. If false, it means this is the only path (i.e.,
4281 // there is no slow path.)
4284 // Perform the optimizations on the fast path i.e., the path in which the
4285 // optimization candidates were collected at the time of identifying them.
4286 // The candidates store all the information necessary (the tree/stmt/block
4287 // they are from) to perform the optimization.
4290 // The unoptimized path is either already cloned when this method is called or
4291 // there is no unoptimized path (got eliminated statically.) So this method
4292 // performs the optimizations assuming that the path in which the candidates
4293 // were collected is the fast path in which the optimizations will be performed.
4295 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4297 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4298 for (unsigned i = 0; i < optInfos->Size(); ++i)
4300 LcOptInfo* optInfo = optInfos->GetRef(i);
4301 switch (optInfo->GetOptType())
4303 case LcOptInfo::LcJaggedArray:
4305 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4306 compCurBB = arrIndexInfo->arrIndex.useBlock;
4307 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4309 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4312 case LcOptInfo::LcMdArray:
4313 // TODO-CQ: CLONE: Implement.
4321 //----------------------------------------------------------------------------
4322 // optCanCloneLoops: Use the environment flag to determine whether loop
4323 // cloning is allowed to be performed.
4326 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4327 // Disabled for retail for now.
4329 bool Compiler::optCanCloneLoops()
4331 // Enabled for retail builds now.
4332 unsigned cloneLoopsFlag = 1;
4334 cloneLoopsFlag = JitConfig.JitCloneLoops();
4336 return (cloneLoopsFlag != 0);
4339 //----------------------------------------------------------------------------
4340 // optIsLoopClonable: Determine whether this loop can be cloned.
4343 // loopInd loop index which needs to be checked if it can be cloned.
4346 // Returns true if the loop can be cloned. If it returns false
4347 // prints a message in debug as why the loop can't be cloned.
4349 bool Compiler::optIsLoopClonable(unsigned loopInd)
4351 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4352 // inserting new EH regions in the exception table yet.
4353 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4354 unsigned loopRetCount = 0;
4355 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4357 if (blk->bbJumpKind == BBJ_RETURN)
4361 if (bbIsTryBeg(blk))
4363 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4368 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4369 // into the middle of a handler (to go to the cloned copy.) Reject.
4370 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4372 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4376 // If the head and entry are in different EH regions, reject.
4377 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4379 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4383 // Is the first block after the last block of the loop a handler or filter start?
4384 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4385 // and go to where the original loop did. That raises problems when we don't actually go to
4386 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4387 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4388 // loop. This is just a corner to cut to get this working faster.
4389 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4390 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4392 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4396 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4397 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4398 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4399 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4400 if (fgReturnCount + loopRetCount > 4)
4402 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4403 "would exceed the limit of 4.\n",
4404 loopRetCount, fgReturnCount);
4408 // Otherwise, we're going to add those return blocks.
4409 fgReturnCount += loopRetCount;
4414 /*****************************************************************************
4416 * Identify loop cloning opportunities, derive loop cloning conditions,
4417 * perform loop cloning, use the derived conditions to choose which
4420 void Compiler::optCloneLoops()
4422 JITDUMP("\n*************** In optCloneLoops()\n");
4423 if (optLoopCount == 0 || !optCanCloneLoops())
4431 printf("Blocks/Trees at start of phase\n");
4432 fgDispBasicBlocks(true);
4436 LoopCloneContext context(optLoopCount, getAllocator());
4438 // Obtain array optimization candidates in the context.
4439 optObtainLoopCloningOpts(&context);
4441 // For each loop, derive cloning conditions for the optimization candidates.
4442 for (unsigned i = 0; i < optLoopCount; ++i)
4444 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4445 if (optInfos == nullptr)
4450 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4452 JITDUMP("> Conditions could not be obtained\n");
4453 context.CancelLoopOptInfo(i);
4457 bool allTrue = false;
4458 bool anyFalse = false;
4459 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4462 context.CancelLoopOptInfo(i);
4466 // Perform static optimizations on the fast path since we always
4467 // have to take the cloned path.
4468 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4470 // No need to clone.
4471 context.CancelLoopOptInfo(i);
4477 // The code in this #if has been useful in debugging loop cloning issues, by
4478 // enabling selective enablement of the loop cloning optimization according to
4481 unsigned methHash = info.compMethodHash();
4482 char* lostr = getenv("loopclonehashlo");
4483 unsigned methHashLo = 0;
4486 sscanf_s(lostr, "%x", &methHashLo);
4487 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4489 char* histr = getenv("loopclonehashhi");
4490 unsigned methHashHi = UINT32_MAX;
4493 sscanf_s(histr, "%x", &methHashHi);
4494 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4496 if (methHash < methHashLo || methHash > methHashHi)
4501 for (unsigned i = 0; i < optLoopCount; ++i)
4503 if (context.GetLoopOptInfo(i) != nullptr)
4506 context.OptimizeConditions(i DEBUGARG(verbose));
4507 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4508 optCloneLoop(i, &context);
4515 printf("\nAfter loop cloning:\n");
4516 fgDispBasicBlocks(/*dumpTrees*/ true);
4521 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4523 assert(loopInd < optLoopCount);
4525 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4526 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4527 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4529 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4530 unsigned depth = optLoopDepth(loopInd);
4531 unsigned ambientWeight = 1;
4532 for (unsigned j = 0; j < depth; j++)
4534 unsigned lastWeight = ambientWeight;
4535 ambientWeight *= BB_LOOP_WEIGHT;
4536 // If the multiplication overflowed, stick at max.
4537 // (Strictly speaking, a multiplication could overflow and still have a result
4538 // that is >= lastWeight...but if so, the original weight must be pretty large,
4539 // and it got bigger, so that's OK.)
4540 if (ambientWeight < lastWeight)
4542 ambientWeight = BB_MAX_WEIGHT;
4547 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4548 // Be safe by taking the max with the head block's weight.
4549 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4551 // This is the containing loop, if any -- to label any blocks we create that are outside
4552 // the loop being cloned.
4553 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4555 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4556 optEnsureUniqueHead(loopInd, ambientWeight);
4558 // We're going to make
4570 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4582 BasicBlock* h = optLoopTable[loopInd].lpHead;
4583 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4585 // Make a new block to be the unique entry to the loop.
4586 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4587 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4588 /*extendRegion*/ true);
4589 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4590 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4591 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4592 newH->bbNatLoopNum = ambientLoop;
4594 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4597 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4598 // "newPred" will be the predecessor of the blocks of the cloned loop.
4599 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4600 BasicBlock* newPred = b;
4601 if (b->bbJumpKind != BBJ_ALWAYS)
4603 BasicBlock* x = b->bbNext;
4606 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4607 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4609 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4610 x2->bbNatLoopNum = ambientLoop;
4613 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4618 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4619 // so that "h" already falls through to "e" (e == t == f).
4620 BasicBlock* h2 = nullptr;
4621 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4623 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4624 /*extendRegion*/ true);
4625 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4627 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4628 h2->bbNatLoopNum = ambientLoop;
4630 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4631 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4634 // Now we'll clone the blocks of the loop body.
4635 BasicBlock* newFirst = nullptr;
4636 BasicBlock* newBot = nullptr;
4638 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4639 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4642 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4643 /*extendRegion*/ true);
4645 BasicBlock::CloneBlockState(this, newBlk, blk);
4646 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4647 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4648 // loop, if one exists -- the parent of the loop we're cloning.
4649 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4651 if (newFirst == nullptr)
4655 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4657 blockMap->Set(blk, newBlk);
4660 // Perform the static optimizations on the fast path.
4661 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4663 // Now go through the new blocks, remapping their jump targets within the loop.
4664 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4668 BasicBlock* newblk = nullptr;
4669 bool b = blockMap->Lookup(blk, &newblk);
4670 assert(b && newblk != nullptr);
4672 assert(blk->bbJumpKind == newblk->bbJumpKind);
4674 // First copy the jump destination(s) from "blk".
4675 optCopyBlkDest(blk, newblk);
4677 // Now redirect the new block according to "blockMap".
4678 optRedirectBlock(newblk, blockMap);
4681 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4682 (h->bbJumpKind == BBJ_ALWAYS));
4684 // If all the conditions are true, go to E2.
4685 BasicBlock* e2 = nullptr;
4686 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4688 h->bbJumpKind = BBJ_COND;
4690 // We will create the following structure
4692 // cond0 (in h) -?> cond1
4693 // slow --> e2 (slow) always
4700 // We should always have block conditions, at the minimum, the array should be deref-able
4701 assert(context->HasBlockConditions(loopInd));
4703 // Create a unique header for the slow path.
4704 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4705 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4706 slowHead->bbNatLoopNum = ambientLoop;
4707 slowHead->bbJumpDest = e2;
4709 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4710 condLast->bbJumpDest = slowHead;
4712 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4715 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4717 assert(foundIt && e2 != nullptr);
4719 fgUpdateChangedFlowGraph();
4722 //--------------------------------------------------------------------------------------------------
4723 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4726 // context loop cloning context variable
4727 // loopNum the loop index
4728 // head loop head for "loopNum"
4729 // slowHead the slow path loop head
4735 // Create the following structure.
4737 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4738 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4740 // cond0 (in h) -?> cond1
4741 // slowHead --> e2 (slowHead) always
4742 // !cond1 -?> slowHead
4743 // !cond2 -?> slowHead
4745 // !condn -?> slowHead
4748 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4750 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4753 BasicBlock* slowHead)
4755 JITDUMP("Inserting loop cloning conditions\n");
4756 assert(context->HasBlockConditions(loopNum));
4758 BasicBlock* curCond = head;
4759 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4760 for (unsigned i = 0; i < levelCond->Size(); ++i)
4762 bool isHeaderBlock = (curCond == head);
4764 // Flip the condition if header block.
4765 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4767 // Create each condition block ensuring wiring between them.
4768 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4769 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4772 curCond->inheritWeight(head);
4773 curCond->bbNatLoopNum = head->bbNatLoopNum;
4774 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4777 // Finally insert cloning conditions after all deref conditions have been inserted.
4778 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4782 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4784 BasicBlock* h = optLoopTable[loopInd].lpHead;
4785 BasicBlock* t = optLoopTable[loopInd].lpTop;
4786 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4787 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4789 // If "h" dominates the entry block, then it is the unique header.
4790 if (fgDominate(h, e))
4795 // Otherwise, create a new empty header block, make it the pred of the entry block,
4796 // and redirect the preds of the entry block to go to this.
4798 BasicBlock* beforeTop = t->bbPrev;
4799 // Make sure that the new block is in the same region as the loop.
4800 // (We will only create loops that are entirely within a region.)
4801 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4802 // This is in the containing loop.
4803 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4804 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4806 // We don't care where it was put; splice it between beforeTop and top.
4807 if (beforeTop->bbNext != h2)
4809 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4810 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4814 if (h2->bbNext != e)
4816 h2->bbJumpKind = BBJ_ALWAYS;
4819 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4821 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4822 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4823 blockMap->Set(e, h2);
4825 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4827 BasicBlock* predBlock = predEntry->flBlock;
4829 // Skip if predBlock is in the loop.
4830 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4834 optRedirectBlock(predBlock, blockMap);
4837 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4840 /*****************************************************************************
4842 * Determine the kind of interference for the call.
4845 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4847 assert(call->gtOper == GT_CALL);
4849 // if not a helper, kills everything
4850 if (call->gtCall.gtCallType != CT_HELPER)
4855 // setfield and array address store kill all indirections
4856 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4858 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4859 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4860 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4861 case CORINFO_HELP_SETFIELDOBJ:
4862 case CORINFO_HELP_ARRADDR_ST:
4864 return CALLINT_REF_INDIRS;
4866 case CORINFO_HELP_SETFIELDFLOAT:
4867 case CORINFO_HELP_SETFIELDDOUBLE:
4868 case CORINFO_HELP_SETFIELD8:
4869 case CORINFO_HELP_SETFIELD16:
4870 case CORINFO_HELP_SETFIELD32:
4871 case CORINFO_HELP_SETFIELD64:
4873 return CALLINT_SCL_INDIRS;
4875 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4876 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4877 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4878 case CORINFO_HELP_SETFIELDSTRUCT:
4880 return CALLINT_ALL_INDIRS;
4886 // other helpers kill nothing
4887 return CALLINT_NONE;
4890 /*****************************************************************************
4892 * See if the given tree can be computed in the given precision (which must
4893 * be smaller than the type of the tree for this to make sense). If 'doit'
4894 * is false, we merely check to see whether narrowing is possible; if we
4895 * get called with 'doit' being true, we actually perform the narrowing.
4898 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4904 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4906 /* Assume we're only handling integer types */
4907 noway_assert(varTypeIsIntegral(srct));
4908 noway_assert(varTypeIsIntegral(dstt));
4910 unsigned srcSize = genTypeSize(srct);
4911 unsigned dstSize = genTypeSize(dstt);
4913 /* dstt must be smaller than srct to narrow */
4914 if (dstSize >= srcSize)
4919 /* Figure out what kind of a node we have */
4920 oper = tree->OperGet();
4921 kind = tree->OperKind();
4923 if (kind & GTK_ASGOP)
4925 noway_assert(doit == false);
4929 ValueNumPair NoVNPair = ValueNumPair();
4931 if (kind & GTK_LEAF)
4935 /* Constants can usually be narrowed by changing their value */
4936 CLANG_FORMAT_COMMENT_ANCHOR;
4938 #ifndef _TARGET_64BIT_
4943 lval = tree->gtIntConCommon.LngValue();
4972 if ((lval & lmask) != lval)
4977 tree->ChangeOperConst(GT_CNS_INT);
4978 tree->gtType = TYP_INT;
4979 tree->gtIntCon.gtIconVal = (int)lval;
4980 if (vnStore != nullptr)
4982 fgValueNumberTreeConst(tree);
4992 ival = tree->gtIntCon.gtIconVal;
5011 #ifdef _TARGET_64BIT_
5018 #endif // _TARGET_64BIT_
5023 if ((ival & imask) != ival)
5028 #ifdef _TARGET_64BIT_
5031 tree->gtType = TYP_INT;
5032 tree->gtIntCon.gtIconVal = (int)ival;
5033 if (vnStore != nullptr)
5035 fgValueNumberTreeConst(tree);
5038 #endif // _TARGET_64BIT_
5042 /* Operands that are in memory can usually be narrowed
5043 simply by changing their gtType */
5046 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5047 if (dstSize == sizeof(int))
5060 noway_assert(doit == false);
5064 if (kind & (GTK_BINOP | GTK_UNOP))
5067 op1 = tree->gtOp.gtOp1;
5069 op2 = tree->gtOp.gtOp2;
5071 switch (tree->gtOper)
5074 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5076 // Is op2 a small constant than can be narrowed into dstt?
5077 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5078 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5080 // We will change the type of the tree and narrow op2
5084 tree->gtType = genActualType(dstt);
5085 tree->SetVNs(vnpNarrow);
5087 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5088 // We may also need to cast away the upper bits of op1
5091 assert(tree->gtType == TYP_INT);
5092 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5094 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5096 tree->gtOp.gtOp1 = op1;
5107 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5109 noway_assert(doit == false);
5117 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5118 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5120 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5121 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5123 noway_assert(doit == false);
5127 /* Simply change the type of the tree */
5131 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5133 tree->gtFlags &= ~GTF_MUL_64RSLT;
5136 tree->gtType = genActualType(dstt);
5137 tree->SetVNs(vnpNarrow);
5145 /* Simply change the type of the tree */
5147 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5149 tree->gtType = genSignedType(dstt);
5150 tree->SetVNs(vnpNarrow);
5152 /* Make sure we don't mess up the variable type */
5153 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5155 tree->gtFlags |= GTF_VAR_CAST;
5168 /* These can always be narrowed since they only represent 0 or 1 */
5173 var_types cast = tree->CastToType();
5174 var_types oprt = op1->TypeGet();
5175 unsigned oprSize = genTypeSize(oprt);
5182 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5187 if (tree->gtOverflow())
5192 /* Is this a cast from the type we're narrowing to or a smaller one? */
5194 if (oprSize <= dstSize)
5196 /* Bash the target type of the cast */
5200 dstt = genSignedType(dstt);
5202 if (oprSize == dstSize)
5204 // Same size: change the CAST into a NOP
5205 tree->ChangeOper(GT_NOP);
5206 tree->gtType = dstt;
5207 tree->gtOp.gtOp2 = nullptr;
5208 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5212 // oprSize is smaller
5213 assert(oprSize < dstSize);
5215 // Change the CastToType in the GT_CAST node
5216 tree->CastToType() = dstt;
5218 // The result type of a GT_CAST is never a small type.
5219 // Use genActualType to widen dstt when it is a small types.
5220 tree->gtType = genActualType(dstt);
5221 tree->SetVNs(vnpNarrow);
5231 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5233 /* Simply change the type of the tree */
5237 tree->gtType = genActualType(dstt);
5238 tree->SetVNs(vnpNarrow);
5245 noway_assert(doit == false);
5253 /*****************************************************************************
5255 * The following logic figures out whether the given variable is assigned
5256 * somewhere in a list of basic blocks (or in an entire loop).
5259 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5261 GenTreePtr tree = *pTree;
5263 if (tree->OperKind() & GTK_ASGOP)
5265 GenTreePtr dest = tree->gtOp.gtOp1;
5266 genTreeOps destOper = dest->OperGet();
5268 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5269 assert(desc && desc->ivaSelf == desc);
5271 if (destOper == GT_LCL_VAR)
5273 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5274 if (tvar < lclMAX_ALLSET_TRACKED)
5276 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5280 desc->ivaMaskIncomplete = true;
5283 if (tvar == desc->ivaVar)
5285 if (tree != desc->ivaSkip)
5291 else if (destOper == GT_LCL_FLD)
5293 /* We can't track every field of every var. Moreover, indirections
5294 may access different parts of the var as different (but
5295 overlapping) fields. So just treat them as indirect accesses */
5297 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5298 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5300 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5301 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5303 else if (destOper == GT_CLS_VAR)
5305 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5307 else if (destOper == GT_IND)
5309 /* Set the proper indirection bits */
5311 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5312 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5315 else if (tree->gtOper == GT_CALL)
5317 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5318 assert(desc && desc->ivaSelf == desc);
5320 desc->ivaMaskCall = optCallInterf(tree);
5323 return WALK_CONTINUE;
5326 /*****************************************************************************/
5328 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5333 desc.ivaSkip = skip;
5335 desc.ivaSelf = &desc;
5338 desc.ivaMaskCall = CALLINT_NONE;
5339 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5345 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5347 noway_assert(stmt->gtOper == GT_STMT);
5348 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5370 /*****************************************************************************/
5371 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5375 /* Get hold of the loop descriptor */
5377 noway_assert(lnum < optLoopCount);
5378 loop = optLoopTable + lnum;
5380 /* Do we already know what variables are assigned within this loop? */
5382 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5389 /* Prepare the descriptor used by the tree walker call-back */
5391 desc.ivaVar = (unsigned)-1;
5392 desc.ivaSkip = nullptr;
5394 desc.ivaSelf = &desc;
5396 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5397 desc.ivaMaskInd = VR_NONE;
5398 desc.ivaMaskCall = CALLINT_NONE;
5399 desc.ivaMaskIncomplete = false;
5401 /* Now walk all the statements of the loop */
5403 beg = loop->lpHead->bbNext;
5404 end = loop->lpBottom;
5406 for (/**/; /**/; beg = beg->bbNext)
5410 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5412 noway_assert(stmt->gtOper == GT_STMT);
5413 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5415 if (desc.ivaMaskIncomplete)
5417 loop->lpFlags |= LPFLG_ASGVARS_INC;
5427 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5428 loop->lpAsgInds = desc.ivaMaskInd;
5429 loop->lpAsgCall = desc.ivaMaskCall;
5431 /* Now we know what variables are assigned in the loop */
5433 loop->lpFlags |= LPFLG_ASGVARS_YES;
5436 /* Now we can finally test the caller's mask against the loop's */
5437 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5442 switch (loop->lpAsgCall)
5446 /* Can't hoist if the call might have side effect on an indirection. */
5448 if (loop->lpAsgInds != VR_NONE)
5455 case CALLINT_REF_INDIRS:
5457 /* Can't hoist if the call might have side effect on an ref indirection. */
5459 if (loop->lpAsgInds & VR_IND_REF)
5466 case CALLINT_SCL_INDIRS:
5468 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5470 if (loop->lpAsgInds & VR_IND_SCL)
5477 case CALLINT_ALL_INDIRS:
5479 /* Can't hoist if the call might have side effect on any indirection. */
5481 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5490 /* Other helpers kill nothing */
5495 noway_assert(!"Unexpected lpAsgCall value");
5501 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5506 printf("\nHoisting a copy of ");
5507 printTreeID(origExpr);
5508 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5509 optLoopTable[lnum].lpBottom->bbNum);
5510 gtDispTree(origExpr);
5515 // This loop has to be in a form that is approved for hoisting.
5516 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5518 // Create a copy of the expression and mark it for CSE's.
5519 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5521 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5522 assert(hoistExpr != origExpr);
5523 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5525 GenTreePtr hoist = hoistExpr;
5526 // The value of the expression isn't used (unless it's an assignment).
5527 if (hoistExpr->OperGet() != GT_ASG)
5529 hoist = gtUnusedValNode(hoistExpr);
5532 /* Put the statement in the preheader */
5534 fgCreateLoopPreHeader(lnum);
5536 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5537 assert(preHead->bbJumpKind == BBJ_NONE);
5539 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5540 // (or in this case, will contain) the expression.
5541 compCurBB = preHead;
5543 // Increment the ref counts of any local vars appearing in "hoist".
5544 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5545 // fold away some of the lcl vars referenced by "hoist".
5546 lvaRecursiveIncRefCounts(hoist);
5548 hoist = fgMorphTree(hoist);
5550 GenTreePtr hoistStmt = gtNewStmt(hoist);
5551 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5553 /* simply append the statement at the end of the preHead's list */
5555 GenTreePtr treeList = preHead->bbTreeList;
5559 /* append after last statement */
5561 GenTreePtr last = treeList->gtPrev;
5562 assert(last->gtNext == nullptr);
5564 last->gtNext = hoistStmt;
5565 hoistStmt->gtPrev = last;
5566 treeList->gtPrev = hoistStmt;
5570 /* Empty pre-header - store the single statement in the block */
5572 preHead->bbTreeList = hoistStmt;
5573 hoistStmt->gtPrev = hoistStmt;
5576 hoistStmt->gtNext = nullptr;
5581 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5586 if (fgStmtListThreaded)
5588 gtSetStmtInfo(hoistStmt);
5589 fgSetStmtSeq(hoistStmt);
5593 if (m_nodeTestData != nullptr)
5596 // What is the depth of the loop "lnum"?
5598 unsigned lnumIter = lnum;
5599 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5602 lnumIter = optLoopTable[lnumIter].lpParent;
5605 NodeToTestDataMap* testData = GetNodeTestData();
5607 TestLabelAndNum tlAndN;
5608 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5610 if (tlAndN.m_num == -1)
5613 printTreeID(origExpr);
5614 printf(" was declared 'do not hoist', but is being hoisted.\n");
5617 else if (tlAndN.m_num != depth)
5620 printTreeID(origExpr);
5621 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5623 tlAndN.m_num, depth);
5628 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5629 // hoist" annotations.
5630 testData->Remove(origExpr);
5631 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5632 tlAndN.m_tl = TL_CSE_Def;
5633 tlAndN.m_num = m_loopHoistCSEClass++;
5634 testData->Set(hoistExpr, tlAndN);
5640 #if LOOP_HOIST_STATS
5641 if (!m_curLoopHasHoistedExpression)
5643 m_loopsWithHoistedExpressions++;
5644 m_curLoopHasHoistedExpression = true;
5646 m_totalHoistedExpressions++;
5647 #endif // LOOP_HOIST_STATS
5650 void Compiler::optHoistLoopCode()
5652 // If we don't have any loops in the method then take an early out now.
5653 if (optLoopCount == 0)
5659 unsigned jitNoHoist = JitConfig.JitNoHoist();
5667 // The code in this #if has been useful in debugging loop cloning issues, by
5668 // enabling selective enablement of the loop cloning optimization according to
5671 unsigned methHash = info.compMethodHash();
5672 char* lostr = getenv("loophoisthashlo");
5673 unsigned methHashLo = 0;
5676 sscanf_s(lostr, "%x", &methHashLo);
5677 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5679 char* histr = getenv("loophoisthashhi");
5680 unsigned methHashHi = UINT32_MAX;
5683 sscanf_s(histr, "%x", &methHashHi);
5684 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5686 if (methHash < methHashLo || methHash > methHashHi)
5688 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5690 #endif // 0 -- debugging loop cloning issues
5695 printf("\n*************** In optHoistLoopCode()\n");
5696 printf("Blocks/Trees before phase\n");
5697 fgDispBasicBlocks(true);
5702 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5703 // they are invariant.)
5704 LoopHoistContext hoistCtxt(this);
5705 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5707 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5712 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5714 optHoistLoopNest(lnum, &hoistCtxt);
5723 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5724 fgDispBasicBlocks(true);
5728 // Make sure that the predecessor lists are accurate
5729 fgDebugCheckBBlist();
5734 // Test Data stuff..
5735 // If we have no test data, early out.
5736 if (m_nodeTestData == nullptr)
5740 NodeToTestDataMap* testData = GetNodeTestData();
5741 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5743 TestLabelAndNum tlAndN;
5744 GenTreePtr node = ki.Get();
5745 bool b = testData->Lookup(node, &tlAndN);
5747 if (tlAndN.m_tl != TL_LoopHoist)
5751 // Otherwise, it is a loop hoist annotation.
5752 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5753 if (tlAndN.m_num >= 0)
5757 printf(" was declared 'must hoist', but has not been hoisted.\n");
5764 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5766 // Do this loop, then recursively do all nested loops.
5767 CLANG_FORMAT_COMMENT_ANCHOR;
5769 #if LOOP_HOIST_STATS
5771 m_curLoopHasHoistedExpression = false;
5772 m_loopsConsidered++;
5773 #endif // LOOP_HOIST_STATS
5775 optHoistThisLoop(lnum, hoistCtxt);
5777 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5779 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5781 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5782 // TODO-Cleanup: we should have a set abstraction for loops.
5783 if (hoistedInCurLoop != nullptr)
5785 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5789 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5791 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5795 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5796 child = optLoopTable[child].lpSibling)
5798 optHoistLoopNest(child, hoistCtxt);
5802 // TODO-Cleanup: we should have a set abstraction for loops.
5803 if (hoistedInCurLoop != nullptr)
5805 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5807 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5808 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5814 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5816 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5818 /* If loop was removed continue */
5820 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5825 /* Get the head and tail of the loop */
5827 BasicBlock* head = pLoopDsc->lpHead;
5828 BasicBlock* tail = pLoopDsc->lpBottom;
5829 BasicBlock* lbeg = pLoopDsc->lpEntry;
5832 // We must have a do-while loop
5833 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5838 // The loop-head must dominate the loop-entry.
5839 // TODO-CQ: Couldn't we make this true if it's not?
5840 if (!fgDominate(head, lbeg))
5845 // if lbeg is the start of a new try block then we won't be able to hoist
5846 if (!BasicBlock::sameTryRegion(head, lbeg))
5851 // We don't bother hoisting when inside of a catch block
5852 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5857 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5859 unsigned begn = lbeg->bbNum;
5860 unsigned endn = tail->bbNum;
5862 // Ensure the per-loop sets/tables are empty.
5863 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5868 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5869 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5873 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5875 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5876 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5877 pLoopDsc->lpHoistedExprCount = 0;
5879 #ifndef _TARGET_64BIT_
5880 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5882 if (longVarsCount > 0)
5884 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5885 // the Counts such that each TYP_LONG variable counts twice.
5887 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5888 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5893 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5894 lvaDispVarSet(lvaLongVars);
5897 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5898 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5900 #endif // !_TARGET_64BIT_
5905 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5906 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5908 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5909 lvaDispVarSet(pLoopDsc->lpVarInOut);
5911 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5912 lvaDispVarSet(loopVars);
5917 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5919 if (floatVarsCount > 0)
5921 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5922 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5924 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5925 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5926 pLoopDsc->lpHoistedFPExprCount = 0;
5928 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5929 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5934 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5935 lvaDispVarSet(inOutFPVars);
5937 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5938 lvaDispVarSet(loopFPVars);
5942 else // (floatVarsCount == 0)
5944 pLoopDsc->lpLoopVarFPCount = 0;
5945 pLoopDsc->lpVarInOutFPCount = 0;
5946 pLoopDsc->lpHoistedFPExprCount = 0;
5949 // Find the set of definitely-executed blocks.
5950 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5951 // Until we have post-dominators, we'll special-case for single-exit blocks.
5952 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5953 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5955 assert(pLoopDsc->lpExit != nullptr);
5956 BasicBlock* cur = pLoopDsc->lpExit;
5957 // Push dominators, until we reach "entry" or exit the loop.
5958 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5963 // If we didn't reach the entry block, give up and *just* push the entry block.
5964 if (cur != pLoopDsc->lpEntry)
5968 defExec.Push(pLoopDsc->lpEntry);
5970 else // More than one exit
5972 // We'll assume that only the entry block is definitely executed.
5973 // We could in the future do better.
5974 defExec.Push(pLoopDsc->lpEntry);
5977 while (defExec.Size() > 0)
5979 // Consider in reverse order: dominator before dominatee.
5980 BasicBlock* blk = defExec.Pop();
5981 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5985 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5986 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
5988 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5989 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5990 unsigned blkWeight = blk->getBBWeight(this);
5995 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
5996 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
5997 firstBlockAndBeforeSideEffect ? "true" : "false");
5998 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6000 printf(" block weight is too small to perform hoisting.\n");
6005 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6007 // Block weight is too small to perform hoisting.
6011 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6013 GenTreePtr stmtTree = stmt->gtStmtExpr;
6015 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6018 // we will try to hoist the top-level stmtTree
6019 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6024 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6026 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6028 bool loopContainsCall = pLoopDsc->lpContainsCall;
6031 int hoistedExprCount;
6035 if (varTypeIsFloating(tree->TypeGet()))
6037 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6038 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6039 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6041 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6042 if (!loopContainsCall)
6044 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6047 // For ARM each double takes two FP registers
6048 // For now on ARM we won't track singles/doubles
6049 // and instead just assume that we always have doubles.
6056 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6057 loopVarCount = pLoopDsc->lpLoopVarCount;
6058 varInOutCount = pLoopDsc->lpVarInOutCount;
6060 availRegCount = CNT_CALLEE_SAVED - 1;
6061 if (!loopContainsCall)
6063 availRegCount += CNT_CALLEE_TRASH - 1;
6065 #ifndef _TARGET_64BIT_
6066 // For our 32-bit targets Long types take two registers.
6067 if (varTypeIsLong(tree->TypeGet()))
6069 availRegCount = (availRegCount + 1) / 2;
6074 // decrement the availRegCount by the count of expression that we have already hoisted.
6075 availRegCount -= hoistedExprCount;
6077 // the variables that are read/written inside the loop should
6078 // always be a subset of the InOut variables for the loop
6079 assert(loopVarCount <= varInOutCount);
6081 // When loopVarCount >= availRegCount we believe that all of the
6082 // available registers will get used to hold LclVars inside the loop.
6083 // This pessimistically assumes that each loopVar has a conflicting
6084 // lifetime with every other loopVar.
6085 // For this case we will hoist the expression only if is profitable
6086 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6087 // as we believe it will be placed in the stack or one of the other
6088 // loopVars will be spilled into the stack
6090 if (loopVarCount >= availRegCount)
6092 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6093 if (tree->gtCostEx < (2 * IND_COST_EX))
6099 // When varInOutCount < availRegCount we are know that there are
6100 // some available register(s) when we enter the loop body.
6101 // When varInOutCount == availRegCount there often will be a register
6102 // available when we enter the loop body, since a loop often defines a
6103 // LclVar on exit or there is often at least one LclVar that is worth
6104 // spilling to the stack to make way for this hoisted expression.
6105 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6107 if (varInOutCount > availRegCount)
6109 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6110 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6120 // This function returns true if 'tree' is a loop invariant expression.
6121 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6123 bool Compiler::optHoistLoopExprsForTree(
6124 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6126 // First do the children.
6127 // We must keep track of whether each child node was hoistable or not
6129 unsigned nChildren = tree->NumChildren();
6130 bool childrenHoistable[GenTree::MAX_CHILDREN];
6132 // Initialize the array elements for childrenHoistable[] to false
6133 for (unsigned i = 0; i < nChildren; i++)
6135 childrenHoistable[i] = false;
6138 bool treeIsInvariant = true;
6139 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6141 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6142 &childrenHoistable[childNum]))
6144 treeIsInvariant = false;
6148 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6150 bool treeIsHoistable = treeIsInvariant;
6152 // But we must see if anything else prevents "tree" from being hoisted.
6154 if (treeIsInvariant)
6156 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6157 treeIsHoistable = optIsCSEcandidate(tree);
6159 // If it's a call, it must be a helper call, and be pure.
6160 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6161 // (meaning it won't run a cctor because the class is not precise-init).
6162 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6164 GenTreeCall* call = tree->AsCall();
6165 if (call->gtCallType != CT_HELPER)
6167 treeIsHoistable = false;
6171 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6172 if (!s_helperCallProperties.IsPure(helpFunc))
6174 treeIsHoistable = false;
6176 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6178 treeIsHoistable = false;
6183 if (treeIsHoistable)
6185 if (!(*pFirstBlockAndBeforeSideEffect))
6187 // For now, we give up on an expression that might raise an exception if it is after the
6188 // first possible global side effect (and we assume we're after that if we're not in the first block).
6189 // TODO-CQ: this is when we might do loop cloning.
6191 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6193 treeIsHoistable = false;
6196 // Currently we must give up on reads from static variables (even if we are in the first block).
6198 if (tree->OperGet() == GT_CLS_VAR)
6200 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6202 treeIsHoistable = false;
6206 // Is the value of the whole tree loop invariant?
6208 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6210 // Is the value of the whole tree loop invariant?
6211 if (!treeIsInvariant)
6213 treeIsHoistable = false;
6217 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6218 // If we encounter a tree with a call in it
6219 // or if we see an assignment to global we set it to false.
6221 // If we are already set to false then we can skip these checks
6223 if (*pFirstBlockAndBeforeSideEffect)
6225 // For this purpose, we only care about memory side effects. We assume that expressions will
6226 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6227 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6228 // here, since that includes exceptions.)
6231 // If it's a call, it must be a helper call that does not mutate the heap.
6232 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6233 // (meaning it won't run a cctor because the class is not precise-init).
6234 GenTreeCall* call = tree->AsCall();
6235 if (call->gtCallType != CT_HELPER)
6237 *pFirstBlockAndBeforeSideEffect = false;
6241 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6242 if (s_helperCallProperties.MutatesHeap(helpFunc))
6244 *pFirstBlockAndBeforeSideEffect = false;
6246 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6248 *pFirstBlockAndBeforeSideEffect = false;
6252 else if (tree->OperIsAssignment())
6254 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6255 GenTreePtr lhs = tree->gtOp.gtOp1;
6256 if (lhs->gtFlags & GTF_GLOB_REF)
6258 *pFirstBlockAndBeforeSideEffect = false;
6261 else if (tree->OperIsCopyBlkOp())
6263 GenTreePtr args = tree->gtOp.gtOp1;
6264 assert(args->OperGet() == GT_LIST);
6265 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6267 *pFirstBlockAndBeforeSideEffect = false;
6272 // If this 'tree' is hoistable then we return and the caller will
6273 // decide to hoist it as part of larger hoistable expression.
6275 if (!treeIsHoistable)
6277 // We are not hoistable so we will now hoist any hoistable children.
6279 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6281 if (childrenHoistable[childNum])
6283 // We can't hoist the LHS of an assignment, isn't a real use.
6284 if (childNum == 0 && (tree->OperIsAssignment()))
6289 GenTreePtr child = tree->GetChild(childNum);
6291 // We try to hoist this 'child' tree
6292 optHoistCandidate(child, lnum, hoistCtxt);
6297 *pHoistable = treeIsHoistable;
6298 return treeIsInvariant;
6301 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6303 if (lnum == BasicBlock::NOT_IN_LOOP)
6305 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6309 // The outer loop also must be suitable for hoisting...
6310 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6315 // If the hoisted expression isn't valid at this loop head then break
6316 if (!optTreeIsValidAtLoopHead(tree, lnum))
6321 // It must pass the hoistable profitablity tests for this loop level
6322 if (!optIsProfitableToHoistableTree(tree, lnum))
6328 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6330 // already hoisted in a parent loop, so don't hoist this expression.
6334 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6336 // already hoisted this expression in the current loop, so don't hoist this expression.
6340 // Expression can be hoisted
6341 optPerformHoistExpr(tree, lnum);
6343 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6344 if (!varTypeIsFloating(tree->TypeGet()))
6346 optLoopTable[lnum].lpHoistedExprCount++;
6347 #ifndef _TARGET_64BIT_
6348 // For our 32-bit targets Long types take two registers.
6349 if (varTypeIsLong(tree->TypeGet()))
6351 optLoopTable[lnum].lpHoistedExprCount++;
6355 else // Floating point expr hoisted
6357 optLoopTable[lnum].lpHoistedFPExprCount++;
6360 // Record the hoisted expression in hoistCtxt
6361 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6364 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6366 // If it is not a VN, is not loop-invariant.
6367 if (vn == ValueNumStore::NoVN)
6372 // We'll always short-circuit constants.
6373 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6378 // If we've done this query previously, don't repeat.
6379 bool previousRes = false;
6380 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6387 if (vnStore->GetVNFunc(vn, &funcApp))
6389 if (funcApp.m_func == VNF_PhiDef)
6391 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6392 VNFuncApp phiDefValFuncApp;
6393 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6395 // It's not *really* a definition, rather a pass-through of some other VN.
6396 // (This could occur, say if both sides of an if-then-else diamond made the
6397 // same assignment to a variable.)
6398 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6402 // Is the definition within the loop? If so, is not loop-invariant.
6403 unsigned lclNum = funcApp.m_args[0];
6404 unsigned ssaNum = funcApp.m_args[1];
6405 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6406 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6409 else if (funcApp.m_func == VNF_PhiHeapDef)
6411 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6412 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6416 for (unsigned i = 0; i < funcApp.m_arity; i++)
6418 // TODO-CQ: We need to either make sure that *all* VN functions
6419 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6421 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6431 // Non-function "new, unique" VN's may be annotated with the loop nest where
6432 // their definition occurs.
6433 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
6435 if (vnLoopNum == MAX_LOOP_NUM)
6441 res = !optLoopContains(lnum, vnLoopNum);
6445 loopVnInvariantCache->Set(vn, res);
6449 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6451 if (tree->OperIsLocal())
6453 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6454 unsigned lclNum = lclVar->gtLclNum;
6456 // The lvlVar must be have an Ssa tracked lifetime
6457 if (fgExcludeFromSsa(lclNum))
6462 // If the loop does not contains the SSA def we can hoist it.
6463 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6468 else if (tree->OperIsConst())
6472 else // If every one of the children nodes are valid at this Loop's Head.
6474 unsigned nChildren = tree->NumChildren();
6475 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6477 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6487 /*****************************************************************************
6489 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6490 * header. The pre-header will replace the current lpHead in the loop table.
6491 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6492 * will also be dominated by the loop-top, lpHead->bbNext.
6496 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6498 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6500 /* This loop has to be a "do-while" loop */
6502 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6504 /* Have we already created a loop-preheader block? */
6506 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6511 BasicBlock* head = pLoopDsc->lpHead;
6512 BasicBlock* top = pLoopDsc->lpTop;
6513 BasicBlock* entry = pLoopDsc->lpEntry;
6515 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6516 if (!BasicBlock::sameTryRegion(head, entry))
6521 // Ensure that lpHead always dominates lpEntry
6523 noway_assert(fgDominate(head, entry));
6525 /* Get hold of the first block of the loop body */
6527 assert(top == entry);
6529 /* Allocate a new basic block */
6531 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6532 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6534 // Must set IL code offset
6535 preHead->bbCodeOffs = top->bbCodeOffs;
6537 // Set the default value of the preHead weight in case we don't have
6538 // valid profile data and since this blocks weight is just an estimate
6539 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6541 preHead->inheritWeight(head);
6542 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6547 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6548 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6552 // The preheader block is part of the containing loop (if any).
6553 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6555 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6557 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6559 preHead->bbWeight = 0;
6560 preHead->bbFlags |= BBF_RUN_RARELY;
6564 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6565 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6566 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6568 if (allValidProfileWeights)
6570 double loopEnteredCount;
6571 double loopSkippedCount;
6573 if (fgHaveValidEdgeWeights)
6575 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6576 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6577 noway_assert(edgeToNext != nullptr);
6578 noway_assert(edgeToJump != nullptr);
6581 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6583 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6587 loopEnteredCount = (double)head->bbNext->bbWeight;
6588 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6591 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6593 // Calculate a good approximation of the preHead's block weight
6594 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6595 preHead->setBBWeight(max(preHeadWeight, 1));
6596 noway_assert(!preHead->isRunRarely());
6601 // Link in the preHead block.
6602 fgInsertBBbefore(top, preHead);
6604 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6605 // However, that is too expensive at this point. Instead, we update the phi
6606 // node block references, if we created pre-header block due to hoisting.
6607 // This is sufficient because any definition participating in SSA that flowed
6608 // into the phi via the loop header block will now flow through the preheader
6609 // block from the header block.
6611 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6613 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6614 if (tree->OperGet() != GT_ASG)
6618 GenTreePtr op2 = tree->gtGetOp2();
6619 if (op2->OperGet() != GT_PHI)
6623 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6624 while (args != nullptr)
6626 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6627 if (phiArg->gtPredBB == head)
6629 phiArg->gtPredBB = preHead;
6631 args = args->Rest();
6635 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6636 // to set the handler index on the pre header without updating the exception table.
6637 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6639 // Update the EH table to make the hoisted block part of the loop's EH block.
6640 fgExtendEHRegionBefore(top);
6642 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6643 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6645 /* Update the loop entry */
6647 pLoopDsc->lpHead = preHead;
6648 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6650 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6651 All predecessors of 'beg', (which is the entry in the loop)
6652 now have to jump to 'preHead', unless they are dominated by 'head' */
6654 preHead->bbRefs = 0;
6655 fgAddRefPred(preHead, head);
6656 bool checkNestedLoops = false;
6658 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6660 BasicBlock* predBlock = pred->flBlock;
6662 if (fgDominate(top, predBlock))
6664 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6665 // (we know that 'head' dominates 'top'), but using 'top' instead of
6666 // 'head' in the test allows us to not enter here if 'predBlock == head'
6668 if (predBlock != pLoopDsc->lpBottom)
6670 noway_assert(predBlock != head);
6671 checkNestedLoops = true;
6676 switch (predBlock->bbJumpKind)
6679 noway_assert(predBlock == head);
6683 if (predBlock == head)
6685 noway_assert(predBlock->bbJumpDest != top);
6691 case BBJ_EHCATCHRET:
6692 noway_assert(predBlock->bbJumpDest == top);
6693 predBlock->bbJumpDest = preHead;
6694 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6696 if (predBlock == head)
6698 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6699 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6700 // Just break, pred will be removed after switch.
6704 fgRemoveRefPred(top, predBlock);
6705 fgAddRefPred(preHead, predBlock);
6711 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6712 BasicBlock** jumpTab;
6713 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6718 if ((*jumpTab) == top)
6720 (*jumpTab) = preHead;
6722 fgRemoveRefPred(top, predBlock);
6723 fgAddRefPred(preHead, predBlock);
6724 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6726 } while (++jumpTab, --jumpCnt);
6729 noway_assert(!"Unexpected bbJumpKind");
6734 noway_assert(!fgGetPredForBlock(top, preHead));
6735 fgRemoveRefPred(top, head);
6736 fgAddRefPred(top, preHead);
6739 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6740 (other than the back-edge of the loop we are considering) then we likely have nested
6741 do-while loops with the same entry block and inserting the preheader block changes the head
6742 of all the nested loops. Now we will update this piece of information in the loop table, and
6743 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6744 do-while loops with the same entry block).
6746 if (checkNestedLoops)
6748 for (unsigned l = 0; l < optLoopCount; l++)
6750 if (optLoopTable[l].lpHead == head)
6752 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6753 noway_assert(optLoopTable[l].lpEntry == top);
6754 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6755 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6759 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6760 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6768 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6770 unsigned lnum = blk->bbNatLoopNum;
6771 while (lnum != BasicBlock::NOT_IN_LOOP)
6773 if (optLoopTable[lnum].lpEntry == blk)
6778 lnum = optLoopTable[lnum].lpParent;
6783 void Compiler::optComputeLoopSideEffects()
6786 for (lnum = 0; lnum < optLoopCount; lnum++)
6788 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6789 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6790 optLoopTable[lnum].lpContainsCall = false;
6793 for (lnum = 0; lnum < optLoopCount; lnum++)
6795 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6800 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6801 { // Is outermost...
6802 optComputeLoopNestSideEffects(lnum);
6806 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6807 #ifndef _TARGET_64BIT_
6808 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6811 for (unsigned i = 0; i < lvaCount; i++)
6813 LclVarDsc* varDsc = &lvaTable[i];
6814 if (varDsc->lvTracked)
6816 if (varTypeIsFloating(varDsc->lvType))
6818 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6820 #ifndef _TARGET_64BIT_
6821 else if (varTypeIsLong(varDsc->lvType))
6823 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6830 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6832 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6833 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6834 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6836 optComputeLoopSideEffectsOfBlock(bbInLoop);
6840 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6842 unsigned mostNestedLoop = blk->bbNatLoopNum;
6843 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6845 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6847 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6849 // Now iterate over the remaining statements, and their trees.
6850 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6852 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6854 genTreeOps oper = tree->OperGet();
6856 // Even after we set heapHavoc we still may want to know if a loop contains calls
6859 if (oper == GT_CALL)
6861 // Record that this loop contains a call
6862 AddContainsCallAllContainingLoops(mostNestedLoop);
6865 // If we just set lpContainsCall or it was previously set
6866 if (optLoopTable[mostNestedLoop].lpContainsCall)
6868 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6872 // We are just looking for GT_CALL nodes after heapHavoc was set.
6876 // otherwise heapHavoc is not set
6879 // This body is a distillation of the heap-side effect code of value numbering.
6880 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6881 // that the compiler creates.
6883 if (GenTree::OperIsAssignment(oper))
6885 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6887 if (lhs->OperGet() == GT_IND)
6889 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6890 FieldSeqNode* fldSeqArrElem = nullptr;
6892 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6900 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6902 // If it's a local byref for which we recorded a value number, use that...
6903 GenTreeLclVar* argLcl = arg->AsLclVar();
6904 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6907 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6909 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6910 funcApp.m_func == VNF_PtrToArrElem)
6912 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6913 CORINFO_CLASS_HANDLE elemType =
6914 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6915 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6916 // Don't set heapHavoc below.
6923 // Is the LHS an array index expression?
6924 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6926 // We actually ignore "fldSeq" -- any modification to an S[], at any
6927 // field of "S", will lose all information about the array type.
6928 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6929 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6933 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6935 GenTreePtr obj = nullptr; // unused
6936 GenTreePtr staticOffset = nullptr; // unused
6937 FieldSeqNode* fldSeq = nullptr;
6939 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6940 (fldSeq != FieldSeqStore::NotAField()))
6942 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6943 assert(fldSeq != nullptr);
6944 if (fldSeq->IsFirstElemFieldSeq())
6946 fldSeq = fldSeq->m_next;
6947 assert(fldSeq != nullptr);
6950 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6958 else if (lhs->OperIsBlk())
6960 GenTreeLclVarCommon* lclVarTree;
6962 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6964 // For now, assume arbitrary side effects on the heap...
6968 else if (lhs->OperGet() == GT_CLS_VAR)
6970 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6972 // Otherwise, must be local lhs form. I should assert that.
6973 else if (lhs->OperGet() == GT_LCL_VAR)
6975 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6976 GenTreePtr rhs = tree->gtOp.gtOp2;
6977 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6978 // If we gave the RHS a value number, propagate it.
6979 if (rhsVN != ValueNumStore::NoVN)
6981 rhsVN = vnStore->VNNormVal(rhsVN);
6982 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6984 lvaTable[lhsLcl->GetLclNum()]
6985 .GetPerSsaData(lhsLcl->GetSsaNum())
6986 ->m_vnPair.SetLiberal(rhsVN);
6991 else // not GenTree::OperIsAssignment(oper)
6996 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7000 // Is it an addr of a array index expression?
7002 GenTreePtr addrArg = tree->gtOp.gtOp1;
7003 if (addrArg->OperGet() == GT_IND)
7005 // Is the LHS an array index expression?
7006 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7009 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7011 CORINFO_CLASS_HANDLE elemType =
7012 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7013 tree->gtVNPair.SetBoth(
7014 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7015 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7016 // The rest are dummy arguments.
7017 vnStore->VNForNull(), vnStore->VNForNull(),
7018 vnStore->VNForNull()));
7024 case GT_LOCKADD: // Binop
7025 case GT_XADD: // Binop
7026 case GT_XCHG: // Binop
7027 case GT_CMPXCHG: // Specialop
7035 GenTreeCall* call = tree->AsCall();
7037 // Record that this loop contains a call
7038 AddContainsCallAllContainingLoops(mostNestedLoop);
7040 if (call->gtCallType == CT_HELPER)
7042 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7043 if (s_helperCallProperties.MutatesHeap(helpFunc))
7047 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7049 // If the call is labeled as "Hoistable", then we've checked the
7050 // class that would be constructed, and it is not precise-init, so
7051 // the cctor will not be run by this call. Otherwise, it might be,
7052 // and might have arbitrary side effects.
7053 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7067 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7076 // Record that all loops containing this block have heap havoc effects.
7077 unsigned lnum = mostNestedLoop;
7078 while (lnum != BasicBlock::NOT_IN_LOOP)
7080 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7081 lnum = optLoopTable[lnum].lpParent;
7086 // Marks the containsCall information to "lnum" and any parent loops.
7087 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7089 assert(0 <= lnum && lnum < optLoopCount);
7090 while (lnum != BasicBlock::NOT_IN_LOOP)
7092 optLoopTable[lnum].lpContainsCall = true;
7093 lnum = optLoopTable[lnum].lpParent;
7097 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7098 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7100 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7101 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7103 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7104 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7107 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7108 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7110 assert(0 <= lnum && lnum < optLoopCount);
7111 while (lnum != BasicBlock::NOT_IN_LOOP)
7113 optLoopTable[lnum].AddVariableLiveness(this, blk);
7114 lnum = optLoopTable[lnum].lpParent;
7118 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7119 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7121 assert(0 <= lnum && lnum < optLoopCount);
7122 while (lnum != BasicBlock::NOT_IN_LOOP)
7124 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7125 lnum = optLoopTable[lnum].lpParent;
7129 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7130 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7132 assert(0 <= lnum && lnum < optLoopCount);
7133 while (lnum != BasicBlock::NOT_IN_LOOP)
7135 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7136 lnum = optLoopTable[lnum].lpParent;
7140 /*****************************************************************************
7142 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7143 * The 'keepList'is either a single tree or a list of trees that are formed by
7144 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7145 * gtExtractSideEffList method.
7149 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7151 GenTreePtr tree = *pTree;
7152 Compiler* comp = data->compiler;
7153 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7155 // We may have a non-NULL side effect list that is being kept
7159 GenTreePtr keptTree = keepList;
7160 while (keptTree->OperGet() == GT_COMMA)
7162 assert(keptTree->OperKind() & GTK_SMPOP);
7163 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7164 GenTreePtr op2 = keptTree->gtGetOp2();
7166 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7167 // that is being kept because it contains some side-effect
7171 // This tree and all of its sub trees are being kept.
7172 return WALK_SKIP_SUBTREES;
7175 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7176 // which can again be another GT_COMMA or the final side-effect part
7180 if (tree == keptTree)
7182 // This tree and all of its sub trees are being kept.
7183 return WALK_SKIP_SUBTREES;
7187 // This node is being removed from the graph of GenTreePtr
7189 // Look for any local variable references
7191 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7196 /* This variable ref is going away, decrease its ref counts */
7198 lclNum = tree->gtLclVarCommon.gtLclNum;
7199 assert(lclNum < comp->lvaCount);
7200 varDsc = comp->lvaTable + lclNum;
7202 // make sure it's been initialized
7203 assert(comp->compCurBB != nullptr);
7204 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7206 /* Decrement its lvRefCnt and lvRefCntWtd */
7208 // Use getBBWeight to determine the proper block weight.
7209 // This impacts the block weights when we have IBC data.
7210 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7213 return WALK_CONTINUE;
7216 /*****************************************************************************
7218 * Routine called to decrement the LclVar ref counts when removing a tree
7219 * during the remove RangeCheck phase.
7220 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7221 * unless the node is found in the 'keepList' (which are saved side effects)
7222 * The keepList is communicated using the walkData.pCallbackData field
7223 * Also the compCurBB must be set to the current BasicBlock which contains
7224 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7227 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7229 // We communicate this value using the walkData.pCallbackData field
7231 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7234 /*****************************************************************************
7236 * Given an array index node, mark it as not needing a range check.
7239 void Compiler::optRemoveRangeCheck(
7240 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7256 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7259 noway_assert(stmt->gtOper == GT_STMT);
7260 noway_assert(tree->gtOper == GT_COMMA);
7261 noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
7262 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7264 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7269 printf("Before optRemoveRangeCheck:\n");
7274 GenTreePtr sideEffList = nullptr;
7277 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7280 // Decrement the ref counts for any LclVars that are being deleted
7282 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7284 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7285 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7287 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7288 tree->gtFlags |= GTF_DONT_CSE;
7290 /* Recalculate the gtCostSz, etc... */
7291 gtSetStmtInfo(stmt);
7293 /* Re-thread the nodes if necessary */
7294 if (fgStmtListThreaded)
7302 printf("After optRemoveRangeCheck:\n");
7308 /*****************************************************************************
7309 * Return the scale in an array reference, given a pointer to the
7310 * multiplication node.
7313 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7316 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7317 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7319 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7321 if (mul->gtOper == GT_LSH)
7323 scale = ((ssize_t)1) << scale;
7326 GenTreePtr index = mul->gtOp.gtOp1;
7328 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7330 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7331 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7332 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7333 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7334 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7335 index = index->gtOp.gtOp1;
7338 assert(!bRngChk || index->gtOper != GT_COMMA);
7348 /*****************************************************************************
7349 * Find the last assignment to of the local variable in the block. Return
7350 * RHS or NULL. If any local variable in the RHS has been killed in
7351 * intervening code, return NULL. If the variable being searched for is killed
7352 * in the intervening code, return NULL.
7356 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7358 VARSET_TP* pKilledInOut,
7359 bool* pLhsRhsKilledAfterInit)
7361 assert(pKilledInOut);
7362 assert(pLhsRhsKilledAfterInit);
7364 *pLhsRhsKilledAfterInit = false;
7366 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7368 GenTreePtr list = block->bbTreeList;
7369 if (list == nullptr)
7374 GenTreePtr rhs = nullptr;
7375 GenTreePtr stmt = list;
7378 stmt = stmt->gtPrev;
7379 if (stmt == nullptr)
7384 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7385 // If we encounter an assignment to a local variable,
7386 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7388 // And the assigned variable equals the input local,
7389 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7391 // If the assignment is '=' and it is not a conditional, then return rhs.
7392 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7394 rhs = tree->gtOp.gtOp2;
7396 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7397 // as we found a kill to the input local.
7400 *pLhsRhsKilledAfterInit = true;
7401 assert(rhs == nullptr);
7407 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7408 if (varDsc == nullptr)
7412 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7415 } while (stmt != list);
7422 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7423 varRefKinds rhsRefs = VR_NONE;
7424 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7425 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7426 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7428 // If RHS has been indirectly referenced, consider it a write and a kill.
7429 *pLhsRhsKilledAfterInit = true;
7436 /*****************************************************************************
7438 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7443 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7445 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7447 add1 += op1->gtIntCon.gtIconVal;
7448 add2 += op2->gtIntCon.gtIconVal;
7452 /* Check for +/- constant on either operand */
7454 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7456 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7457 op1 = op1->gtOp.gtOp1;
7460 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7462 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7463 op2 = op2->gtOp.gtOp1;
7466 /* We only allow local variable references */
7468 if (op1->gtOper != GT_LCL_VAR)
7470 if (op2->gtOper != GT_LCL_VAR)
7472 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7475 /* NOTE: Caller ensures that this variable has only one def */
7477 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7478 // printf("size [%d]:\n", add2); gtDispTree(op2);
7482 return (bool)(add1 <= add2);
7487 //------------------------------------------------------------------------------
7488 // optObtainLoopCloningOpts: Identify optimization candidates and update
7489 // the "context" for array optimizations.
7492 // context - data structure where all loop cloning info is kept. The
7493 // optInfo fields of the context are updated with the
7494 // identified optimization candidates.
7496 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7498 for (unsigned i = 0; i < optLoopCount; i++)
7500 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7501 if (optIsLoopClonable(i))
7503 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7505 optIdentifyLoopOptInfo(i, context);
7508 JITDUMP("------------------------------------------------------------\n");
7513 //------------------------------------------------------------------------
7514 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7515 // check if the loop is suitable for the optimizations performed.
7518 // loopNum - the current loop index for which conditions are derived.
7519 // context - data structure where all loop cloning candidates will be
7523 // If the loop is not suitable for the optimizations, return false - context
7524 // should not contain any optimization candidate for the loop if false.
7525 // Else return true.
7528 // Check if the loop is well formed for this optimization and identify the
7529 // optimization candidates and update the "context" parameter with all the
7530 // contextual information necessary to perform the optimization later.
7532 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7534 noway_assert(loopNum < optLoopCount);
7536 LoopDsc* pLoop = &optLoopTable[loopNum];
7538 if (!(pLoop->lpFlags & LPFLG_ITER))
7540 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7544 unsigned ivLclNum = pLoop->lpIterVar();
7545 if (lvaVarAddrExposed(ivLclNum))
7547 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7551 BasicBlock* head = pLoop->lpHead;
7552 BasicBlock* end = pLoop->lpBottom;
7553 BasicBlock* beg = head->bbNext;
7555 if (end->bbJumpKind != BBJ_COND)
7557 JITDUMP("> Couldn't find termination test.\n");
7561 if (end->bbJumpDest != beg)
7563 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7567 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7568 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7570 JITDUMP("> Loop iteration operator not matching\n");
7574 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7575 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7577 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7581 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7582 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7583 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7584 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7586 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7587 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7591 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7593 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7594 pLoop->lpTestTree->gtTreeID);
7599 GenTreePtr op1 = pLoop->lpIterator();
7600 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7603 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7604 end->bbNext ? end->bbNext->bbNum : 0);
7606 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7607 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7610 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7613 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7620 //---------------------------------------------------------------------------------------------------------------
7621 // optExtractArrIndex: Try to extract the array index from "tree".
7624 // tree the tree to be checked if it is the array [] operation.
7625 // result the extracted GT_INDEX information is updated in result.
7626 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7629 // Returns true if array index can be extracted, else, return false. See assumption about
7630 // what will be extracted. The "result" variable's rank parameter is advanced for every
7631 // dimension of [] encountered.
7634 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7635 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7636 // to reconstruct this to be able to know if this is an array access.
7639 // The method extracts only if the array base and indices are GT_LCL_VAR.
7641 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7643 // [000000001AF828D8] ---XG------- indir int
7644 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7645 // [000000001AF87340] ------------ + byref
7646 // [000000001AF87160] -------N---- const long 2
7647 // [000000001AF871D8] ------------ << long
7648 // [000000001AF870C0] ------------ cast long <- int
7649 // [000000001AF86F30] i----------- lclVar int V04 loc0
7650 // [000000001AF87250] ------------ + byref
7651 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7652 // [000000001AF87468] ---XG------- comma int
7653 // [000000001AF87020] ---X-------- arrBndsChk void
7654 // [000000001AF86FA8] ---X-------- arrLen int
7655 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7656 // [000000001AF82860] ------------ lclVar int V04 loc0
7657 // [000000001AF829F0] -A-XG------- = int
7658 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7660 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7662 if (tree->gtOper != GT_COMMA)
7666 GenTreePtr before = tree->gtGetOp1();
7667 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7671 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7672 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7676 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7680 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7681 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7686 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7688 GenTreePtr after = tree->gtGetOp2();
7690 if (after->gtOper != GT_IND)
7694 // It used to be the case that arrBndsChks for struct types would fail the previous check because
7695 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
7696 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
7697 // that we were not previously cloning.)
7698 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
7700 if (varTypeIsStruct(after))
7705 GenTreePtr sibo = after->gtGetOp1();
7706 if (sibo->gtOper != GT_ADD)
7710 GenTreePtr sib = sibo->gtGetOp1();
7711 GenTreePtr ofs = sibo->gtGetOp2();
7712 if (ofs->gtOper != GT_CNS_INT)
7716 if (sib->gtOper != GT_ADD)
7720 GenTreePtr si = sib->gtGetOp2();
7721 GenTreePtr base = sib->gtGetOp1();
7722 if (si->gtOper != GT_LSH)
7726 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7730 GenTreePtr scale = si->gtGetOp2();
7731 GenTreePtr index = si->gtGetOp1();
7732 if (scale->gtOper != GT_CNS_INT)
7736 #ifdef _TARGET_AMD64_
7737 if (index->gtOper != GT_CAST)
7741 GenTreePtr indexVar = index->gtGetOp1();
7743 GenTreePtr indexVar = index;
7745 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7749 if (lhsNum == BAD_VAR_NUM)
7751 result->arrLcl = arrLcl;
7753 result->indLcls.Push(indLcl);
7754 result->bndsChks.Push(tree);
7755 result->useBlock = compCurBB;
7761 //---------------------------------------------------------------------------------------------------------------
7762 // optReconstructArrIndex: Reconstruct array index.
7765 // tree the tree to be checked if it is an array [][][] operation.
7766 // result the extracted GT_INDEX information.
7767 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7770 // Returns true if array index can be extracted, else, return false. "rank" field in
7771 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7774 // Recursively look for a list of array indices. In the example below, we encounter,
7775 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7776 // The return value would then be:
7777 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7779 // V00[V01][V02] would be morphed as:
7781 // [000000001B366848] ---XG------- indir int
7782 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7783 // [000000001B36C200] ---XG------- comma int
7784 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7785 // [000000001B36C278] -A-XG------- comma int
7786 // [000000001B366730] R--XG------- indir ref
7787 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7788 // [000000001B36C818] ---XG------- comma ref
7789 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7790 // [000000001B36BB60] -A-XG------- = ref
7791 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7792 // [000000001B36A668] -A-XG------- = int
7793 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7796 // The method extracts only if the array base and indices are GT_LCL_VAR.
7798 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7800 // If we can extract "tree" (which is a top level comma) return.
7801 if (optExtractArrIndex(tree, result, lhsNum))
7805 // We have a comma (check if array base expr is computed in "before"), descend further.
7806 else if (tree->OperGet() == GT_COMMA)
7808 GenTreePtr before = tree->gtGetOp1();
7809 // "before" should evaluate an array base for the "after" indexing.
7810 if (before->OperGet() != GT_ASG)
7814 GenTreePtr lhs = before->gtGetOp1();
7815 GenTreePtr rhs = before->gtGetOp2();
7817 // "rhs" should contain an GT_INDEX
7818 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7822 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7823 GenTreePtr after = tree->gtGetOp2();
7824 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7825 return optExtractArrIndex(after, result, lhsNum);
7831 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7833 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7836 //-------------------------------------------------------------------------
7837 // optIsStackLocalInvariant: Is stack local invariant in loop.
7840 // loopNum The loop in which the variable is tested for invariance.
7841 // lclNum The local that is tested for invariance in the loop.
7844 // Returns true if the variable is loop invariant in loopNum.
7846 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7848 if (lvaVarAddrExposed(lclNum))
7852 if (optIsVarAssgLoop(loopNum, lclNum))
7859 //----------------------------------------------------------------------------------------------
7860 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7861 // identify as potential candidate and update the loop context.
7864 // tree The tree encountered during the tree walk.
7865 // info Supplies information about the current block or stmt in which the tree is.
7866 // Also supplies the "context" pointer for updating with loop cloning
7867 // candidates. Also supplies loopNum.
7870 // If array index can be reconstructed, check if the iter var of the loop matches the
7871 // array index var in some dim. Also ensure other index vars before the identified
7872 // dim are loop invariant.
7875 // Skip sub trees if the optimization candidate is identified or else continue walking
7877 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7879 ArrIndex arrIndex(getAllocator());
7881 // Check if array index can be optimized.
7882 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7884 assert(tree->gtOper == GT_COMMA);
7888 JITDUMP("Found ArrIndex at tree ");
7890 printf(" which is equivalent to: ");
7895 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7897 return WALK_SKIP_SUBTREES;
7900 // Walk the dimensions and see if iterVar of the loop is used as index.
7901 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7903 // Is index variable also used as the loop iter var.
7904 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7906 // Check the previous indices are all loop invariant.
7907 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7909 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7911 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7912 return WALK_SKIP_SUBTREES;
7918 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7920 JITDUMP(" on dim %d\n", dim);
7923 // Update the loop context.
7924 info->context->EnsureLoopOptInfo(info->loopNum)
7925 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7929 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7933 return WALK_SKIP_SUBTREES;
7935 else if (tree->gtOper == GT_ARR_ELEM)
7937 // TODO-CQ: CLONE: Implement.
7938 return WALK_SKIP_SUBTREES;
7940 return WALK_CONTINUE;
7943 struct optRangeCheckDsc
7945 Compiler* pCompiler;
7949 Walk to make sure that only locals and constants are contained in the index
7952 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7954 GenTreePtr tree = *pTree;
7955 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7957 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7959 pData->bValidIndex = false;
7963 if (tree->gtOper == GT_LCL_VAR)
7965 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7967 pData->bValidIndex = false;
7972 return WALK_CONTINUE;
7976 returns true if a range check can legally be removed (for the moment it checks
7977 that the array is a local array (non subject to racing conditions) and that the
7978 index is either a constant or a local
7980 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7982 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7983 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7984 GenTreePtr pArray = bndsChk->GetArray();
7985 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
7989 GenTreePtr pIndex = bndsChk->gtIndex;
7991 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
7992 // Otherwise we can be targeted by malicious race-conditions.
7993 if (pArray != nullptr)
7995 if (pArray->gtOper != GT_LCL_VAR)
8001 printf("Can't remove range check if the array isn't referenced with a local\n");
8009 noway_assert(pArray->gtType == TYP_REF);
8010 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8012 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8014 // If the array address has been taken, don't do the optimization
8015 // (this restriction can be lowered a bit, but i don't think it's worth it)
8016 CLANG_FORMAT_COMMENT_ANCHOR;
8020 printf("Can't remove range check if the array has its address taken\n");
8029 optRangeCheckDsc Data;
8030 Data.pCompiler = this;
8031 Data.bValidIndex = true;
8033 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8035 if (!Data.bValidIndex)
8040 printf("Can't remove range check with this index");
8051 /******************************************************************************
8053 * Replace x==null with (x|x)==0 if x is a GC-type.
8054 * This will stress code-gen and the emitter to make sure they support such trees.
8059 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8061 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8066 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8067 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8069 noway_assert(condStmt->gtOper == GT_JTRUE);
8074 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8076 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8081 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8086 GenTreePtr comparandClone = gtCloneExpr(comparand);
8088 // Bump up the ref-counts of any variables in 'comparandClone'
8089 compCurBB = condBlock;
8090 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8092 noway_assert(relop->gtOp.gtOp1 == comparand);
8093 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8094 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8096 // Comparand type is already checked, and we have const int, there is no harm
8097 // morphing it into a TYP_I_IMPL.
8098 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8099 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8104 /******************************************************************************
8105 * Function used by folding of boolean conditionals
8106 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8107 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8108 * being a boolean lclVar and "op2" the const 0/1.
8109 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8110 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8111 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8112 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8113 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8116 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8118 bool isBool = false;
8120 noway_assert(condBranch->gtOper == GT_JTRUE);
8121 GenTree* cond = condBranch->gtOp.gtOp1;
8123 /* The condition must be "!= 0" or "== 0" */
8125 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8130 /* Return the compare node to the caller */
8134 /* Get hold of the comparands */
8136 GenTree* opr1 = cond->gtOp.gtOp1;
8137 GenTree* opr2 = cond->gtOp.gtOp2;
8139 if (opr2->gtOper != GT_CNS_INT)
8144 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8149 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8151 /* Is the value a boolean?
8152 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8153 * a local variable that is marked as being boolean (lvIsBoolean) */
8155 if (opr1->gtFlags & GTF_BOOLEAN)
8159 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8163 else if (opr1->gtOper == GT_LCL_VAR)
8165 /* is it a boolean local variable */
8167 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8168 noway_assert(lclNum < lvaCount);
8170 if (lvaTable[lclNum].lvIsBoolean)
8176 /* Was our comparison against the constant 1 (i.e. true) */
8179 // If this is a boolean expression tree we can reverse the relop
8180 // and change the true to false.
8183 gtReverseCond(cond);
8184 opr2->gtIntCon.gtIconVal = 0;
8196 void Compiler::optOptimizeBools()
8201 printf("*************** In optOptimizeBools()\n");
8204 printf("Blocks/Trees before phase\n");
8205 fgDispBasicBlocks(true);
8215 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8217 /* We're only interested in conditional jumps here */
8219 if (b1->bbJumpKind != BBJ_COND)
8224 /* If there is no next block, we're done */
8226 BasicBlock* b2 = b1->bbNext;
8232 /* The next block must not be marked as BBF_DONT_REMOVE */
8233 if (b2->bbFlags & BBF_DONT_REMOVE)
8238 /* The next block also needs to be a condition */
8240 if (b2->bbJumpKind != BBJ_COND)
8243 optOptimizeBoolsGcStress(b1);
8248 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8250 if (b1->bbJumpDest == b2->bbJumpDest)
8252 /* Given the following sequence of blocks :
8256 we wil try to fold it to :
8257 B1: brtrue(t1|t2, BX)
8263 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8265 /* Given the following sequence of blocks :
8269 we will try to fold it to :
8270 B1: brtrue((!t1)&&t2, B3)
8281 /* The second block must contain a single statement */
8283 GenTreePtr s2 = b2->bbTreeList;
8284 if (s2->gtPrev != s2)
8289 noway_assert(s2->gtOper == GT_STMT);
8290 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8291 noway_assert(t2->gtOper == GT_JTRUE);
8293 /* Find the condition for the first block */
8295 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8297 noway_assert(s1->gtOper == GT_STMT);
8298 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8299 noway_assert(t1->gtOper == GT_JTRUE);
8301 if (b2->countOfInEdges() > 1)
8306 /* Find the branch conditions of b1 and b2 */
8310 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8316 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8322 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8323 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8325 // Leave out floats where the bit-representation is more complicated
8326 // - there are two representations for 0.
8328 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8333 // Make sure the types involved are of the same sizes
8334 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8338 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8342 #ifdef _TARGET_ARMARCH_
8343 // Skip the small operand which we cannot encode.
8344 if (varTypeIsSmall(c1->TypeGet()))
8347 /* The second condition must not contain side effects */
8349 if (c2->gtFlags & GTF_GLOB_EFFECT)
8354 /* The second condition must not be too expensive */
8358 if (c2->gtCostEx > 12)
8365 var_types foldType = c1->TypeGet();
8366 if (varTypeIsGC(foldType))
8368 foldType = TYP_I_IMPL;
8373 /* Both conditions must be the same */
8375 if (t1->gtOper != t2->gtOper)
8380 if (t1->gtOper == GT_EQ)
8382 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8383 So we will branch to BX if (c1&c2)==0 */
8390 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8391 So we will branch to BX if (c1|c2)!=0 */
8399 /* The b1 condition must be the reverse of the b2 condition */
8401 if (t1->gtOper == t2->gtOper)
8406 if (t1->gtOper == GT_EQ)
8408 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8409 So we will branch to BX if (c1&c2)!=0 */
8416 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8417 So we will branch to BX if (c1|c2)==0 */
8424 // Anding requires both values to be 0 or 1
8426 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8432 // Now update the trees
8434 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8437 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8438 cmpOp1->gtFlags |= GTF_BOOLEAN;
8442 t1->gtOp.gtOp1 = cmpOp1;
8443 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8445 #if FEATURE_SET_FLAGS
8446 // For comparisons against zero we will have the GTF_SET_FLAGS set
8447 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8448 // during the CSE phase.
8450 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8451 // as they are no longer feeding directly into a comparisons against zero
8453 // Make sure that the GTF_SET_FLAGS bit is cleared.
8454 // Fix 388436 ARM JitStress WP7
8455 c1->gtFlags &= ~GTF_SET_FLAGS;
8456 c2->gtFlags &= ~GTF_SET_FLAGS;
8458 // The new top level node that we just created does feed directly into
8459 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8460 // we generate an instuction that sets the flags, which allows us
8461 // to omit the cmp with zero instruction.
8463 // Request that the codegen for cmpOp1 sets the condition flags
8464 // when it generates the code for cmpOp1.
8466 cmpOp1->gtRequestSetFlags();
8469 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8472 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8476 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8480 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8482 fgRemoveRefPred(b1->bbJumpDest, b1);
8484 b1->bbJumpDest = b2->bbJumpDest;
8486 fgAddRefPred(b2->bbJumpDest, b1);
8489 noway_assert(edge1 != nullptr);
8490 noway_assert(edge2 != nullptr);
8492 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8493 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8494 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8496 edge1->flEdgeWeightMin = edgeSumMin;
8497 edge1->flEdgeWeightMax = edgeSumMax;
8501 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8502 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8505 /* Get rid of the second block (which is a BBJ_COND) */
8507 noway_assert(b1->bbJumpKind == BBJ_COND);
8508 noway_assert(b2->bbJumpKind == BBJ_COND);
8509 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8510 noway_assert(b1->bbNext == b2);
8511 noway_assert(b2->bbNext);
8514 b2->bbFlags |= BBF_REMOVED;
8516 // If b2 was the last block of a try or handler, update the EH table.
8518 ehUpdateForDeletedBlock(b2);
8520 /* Update bbRefs and bbPreds */
8522 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8523 * Remove pred 'b2' for 'b2->bbJumpDest' */
8525 fgReplacePred(b2->bbNext, b2, b1);
8527 fgRemoveRefPred(b2->bbJumpDest, b2);
8529 /* Update the block numbers and try again */
8541 // Update loop table
8542 fgUpdateLoopsAfterCompacting(b1, b2);
8547 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8548 b1->bbNum, b2->bbNum);
8557 fgDebugCheckBBlist();