1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
825 if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
827 optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
830 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
832 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
834 else if (limitOp->gtOper == GT_ARR_LENGTH)
836 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
842 // Save the type of the comparison between the iterator and the limit.
843 optLoopTable[loopInd].lpTestTree = relop;
847 //----------------------------------------------------------------------------------
848 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
851 // incr The incr tree to be checked. Whether incr tree is
852 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
855 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
856 // and the rhs limit is extracted from the "test" tree. The limit information is
857 // added to the loop table.
860 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
862 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
865 genTreeOps updateOper;
866 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
867 if (iterVar != BAD_VAR_NUM)
869 // We have v = v op y type asg node.
882 // Increment should be by a const int.
883 // TODO-CQ: CLONE: allow variable increments.
884 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
893 //----------------------------------------------------------------------------------
894 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
897 // from, to - are blocks (beg, end) which are part of the loop.
898 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
899 // pIterVar - see return value.
902 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
906 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
907 // assigned in the loop.
909 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
912 unsigned iterVar = optIsLoopIncrTree(incr);
913 if (iterVar == BAD_VAR_NUM)
917 if (optIsVarAssigned(from, to, incr, iterVar))
919 JITDUMP("iterVar is assigned in loop\n");
927 //----------------------------------------------------------------------------------
928 // optIsLoopTestEvalIntoTemp:
929 // Pattern match if the test tree is computed into a tmp
930 // and the "tmp" is used as jump condition for loop termination.
933 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
934 // where Vtmp contains the actual loop test result.
935 // newStmt - contains the statement that is the actual test stmt involving
936 // the loop iterator.
939 // Returns true if a new test tree can be obtained.
942 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
943 // Then returns the rhs for def of Vtmp as the "test" node.
946 // This method just retrieves what it thinks is the "test" node,
947 // the callers are expected to verify that "iterVar" is used in the test.
949 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
951 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
953 if (test->gtOper != GT_JTRUE)
958 GenTreePtr relop = test->gtGetOp1();
959 noway_assert(relop->OperIsCompare());
961 GenTreePtr opr1 = relop->gtOp.gtOp1;
962 GenTreePtr opr2 = relop->gtOp.gtOp2;
964 // Make sure we have jtrue (vtmp != 0)
965 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
966 opr2->IsIntegralConst(0))
968 // Get the previous statement to get the def (rhs) of Vtmp to see
969 // if the "test" is evaluated into Vtmp.
970 GenTreePtr prevStmt = testStmt->gtPrev;
971 if (prevStmt == nullptr)
976 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
977 if (tree->OperGet() == GT_ASG)
979 GenTreePtr lhs = tree->gtOp.gtOp1;
980 GenTreePtr rhs = tree->gtOp.gtOp2;
982 // Return as the new test node.
983 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
985 if (rhs->OperIsCompare())
996 //----------------------------------------------------------------------------------
997 // optExtractInitTestIncr:
998 // Extract the "init", "test" and "incr" nodes of the loop.
1001 // head - Loop head block
1002 // bottom - Loop bottom block
1003 // top - Loop top block
1004 // ppInit - The init stmt of the loop if found.
1005 // ppTest - The test stmt of the loop if found.
1006 // ppIncr - The incr stmt of the loop if found.
1009 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1010 // returns true. Returns false if the information can't be extracted.
1013 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1014 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1015 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1018 // +-------<-----------------<-----------+
1021 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1023 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1024 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1025 // in the jtrue condition.
1028 // This method just retrieves what it thinks is the "test" node,
1029 // the callers are expected to verify that "iterVar" is used in the test.
1031 bool Compiler::optExtractInitTestIncr(
1032 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1034 assert(ppInit != nullptr);
1035 assert(ppTest != nullptr);
1036 assert(ppIncr != nullptr);
1038 // Check if last two statements in the loop body are the increment of the iterator
1039 // and the loop termination test.
1040 noway_assert(bottom->bbTreeList != nullptr);
1041 GenTreePtr test = bottom->bbTreeList->gtPrev;
1042 noway_assert(test != nullptr && test->gtNext == nullptr);
1045 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1050 // Check if we have the incr tree before the test tree, if we don't,
1051 // check if incr is part of the loop "top".
1052 GenTreePtr incr = test->gtPrev;
1053 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1055 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1060 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1061 GenTreePtr topLast = top->bbTreeList->gtPrev;
1062 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1072 assert(test != incr);
1074 // Find the last statement in the loop pre-header which we expect to be the initialization of
1075 // the loop iterator.
1076 GenTreePtr phdr = head->bbTreeList;
1077 if (phdr == nullptr)
1082 GenTreePtr init = phdr->gtPrev;
1083 noway_assert(init != nullptr && (init->gtNext == nullptr));
1085 // If it is a duplicated loop condition, skip it.
1086 if (init->gtFlags & GTF_STMT_CMPADD)
1088 // Must be a duplicated loop condition.
1089 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1090 init = init->gtPrev;
1091 noway_assert(init != nullptr);
1094 noway_assert(init->gtOper == GT_STMT);
1095 noway_assert(test->gtOper == GT_STMT);
1096 noway_assert(incr->gtOper == GT_STMT);
1098 *ppInit = init->gtStmt.gtStmtExpr;
1099 *ppTest = test->gtStmt.gtStmtExpr;
1100 *ppIncr = incr->gtStmt.gtStmtExpr;
1105 /*****************************************************************************
1107 * Record the loop in the loop table.
1110 void Compiler::optRecordLoop(BasicBlock* head,
1116 unsigned char exitCnt)
1118 // Record this loop in the table, if there's room.
1120 assert(optLoopCount <= MAX_LOOP_NUM);
1121 if (optLoopCount == MAX_LOOP_NUM)
1124 loopOverflowThisMethod = true;
1129 // Assumed preconditions on the loop we're adding.
1130 assert(first->bbNum <= top->bbNum);
1131 assert(top->bbNum <= entry->bbNum);
1132 assert(entry->bbNum <= bottom->bbNum);
1133 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1135 // If the new loop contains any existing ones, add it in the right place.
1136 unsigned char loopInd = optLoopCount;
1137 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1139 unsigned char prev = prevPlus1 - 1;
1140 if (optLoopTable[prev].lpContainedBy(first, bottom))
1145 // Move up any loops if necessary.
1146 for (unsigned j = optLoopCount; j > loopInd; j--)
1148 optLoopTable[j] = optLoopTable[j - 1];
1152 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1154 // The loop is well-formed.
1155 assert(optLoopTable[i].lpWellFormed());
1156 // Check for disjoint.
1157 if (optLoopTable[i].lpDisjoint(first, bottom))
1161 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1162 assert(optLoopTable[i].lpContainedBy(first, bottom));
1166 optLoopTable[loopInd].lpHead = head;
1167 optLoopTable[loopInd].lpFirst = first;
1168 optLoopTable[loopInd].lpTop = top;
1169 optLoopTable[loopInd].lpBottom = bottom;
1170 optLoopTable[loopInd].lpEntry = entry;
1171 optLoopTable[loopInd].lpExit = exit;
1172 optLoopTable[loopInd].lpExitCnt = exitCnt;
1174 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1175 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1176 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1178 optLoopTable[loopInd].lpFlags = 0;
1180 // We haven't yet recorded any side effects.
1181 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1182 optLoopTable[loopInd].lpFieldsModified = nullptr;
1183 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1185 // If DO-WHILE loop mark it as such.
1186 if (head->bbNext == entry)
1188 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1191 // If single exit loop mark it as such.
1195 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1199 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1200 // We have the following restrictions:
1201 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1202 // 2. There must be a loop iterator (a local var) that is
1203 // incremented (decremented or lsh, rsh, mul) with a constant value
1204 // 3. The iterator is incremented exactly once
1205 // 4. The loop condition must use the iterator.
1207 if (bottom->bbJumpKind == BBJ_COND)
1212 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1217 unsigned iterVar = BAD_VAR_NUM;
1218 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1223 // Make sure the "iterVar" initialization is never skipped,
1224 // i.e. every pred of ENTRY other than HEAD is in the loop.
1225 for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
1227 BasicBlock* predBlock = predEdge->flBlock;
1228 if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
1234 if (!optPopulateInitInfo(loopInd, init, iterVar))
1239 // Check that the iterator is used in the loop condition.
1240 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1245 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1246 // Record the iterator, the pointer to the test node
1247 // and the initial value of the iterator (constant or local var)
1248 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1251 optLoopTable[loopInd].lpIterTree = incr;
1254 // Save the initial value of the iterator - can be lclVar or constant
1255 // Flag the loop accordingly.
1261 simpleTestLoopCount++;
1264 // Check if a constant iteration loop.
1265 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1267 // This is a constant loop.
1268 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1270 constIterLoopCount++;
1277 printf("\nConstant loop initializer:\n");
1280 printf("\nConstant loop body:\n");
1282 BasicBlock* block = head;
1285 block = block->bbNext;
1286 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1288 if (stmt->gtStmt.gtStmtExpr == incr)
1293 gtDispTree(stmt->gtStmt.gtStmtExpr);
1295 } while (block != bottom);
1301 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1306 //------------------------------------------------------------------------
1307 // optPrintLoopRecording: Print a recording of the loop.
1310 // loopInd - loop index.
1312 void Compiler::optPrintLoopRecording(unsigned loopInd)
1314 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1315 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1316 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1317 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1318 optLoopTable[loopInd].lpExit);
1320 // If an iterator loop print the iterator and the initialization.
1321 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1323 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1325 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1327 printf("%d )", optLoopTable[loopInd].lpIterConst());
1329 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1331 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1333 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1335 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1338 // If a simple test condition print operator and the limits */
1339 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1341 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1343 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1346 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1348 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1357 void Compiler::optCheckPreds()
1360 BasicBlock* blockPred;
1363 for (block = fgFirstBB; block; block = block->bbNext)
1365 for (pred = block->bbPreds; pred; pred = pred->flNext)
1367 // make sure this pred is part of the BB list
1368 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1370 if (blockPred == pred->flBlock)
1375 noway_assert(blockPred);
1376 switch (blockPred->bbJumpKind)
1379 if (blockPred->bbJumpDest == block)
1385 noway_assert(blockPred->bbNext == block);
1387 case BBJ_EHFILTERRET:
1389 case BBJ_EHCATCHRET:
1390 noway_assert(blockPred->bbJumpDest == block);
1401 /*****************************************************************************
1402 * Find the natural loops, using dominators. Note that the test for
1403 * a loop is slightly different from the standard one, because we have
1404 * not done a depth first reordering of the basic blocks.
1407 void Compiler::optFindNaturalLoops()
1412 printf("*************** In optFindNaturalLoops()\n");
1418 flowList* predEntry;
1420 noway_assert(fgDomsComputed);
1424 hasMethodLoops = false;
1425 loopsThisMethod = 0;
1426 loopOverflowThisMethod = false;
1429 /* We will use the following terminology:
1430 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1431 Not part of the looping of the loop.
1432 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1433 * but not the outer loop. ???)
1434 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1435 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1436 * EXIT - the loop exit or the block right after the bottom
1437 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1439 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1473 unsigned char exitCount;
1475 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1481 // Blocks that are rarely run have a zero bbWeight and should
1482 // never be optimized here
1484 if (top->bbWeight == BB_ZERO_WEIGHT)
1489 for (pred = top->bbPreds; pred; pred = pred->flNext)
1491 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1492 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1493 * as the definition says, but merely an indication that we have a loop there).
1494 * Thus, we have to be very careful and after entry discovery check that it is indeed
1495 * the only place we enter the loop (especially for non-reducible flow graphs).
1498 bottom = pred->flBlock;
1501 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1503 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1504 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1505 (bottom->bbJumpKind == BBJ_SWITCH))
1507 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1508 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1512 BasicBlock* loopBlock;
1514 /* The presence of a "back edge" is an indication that a loop might be present here
1517 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1518 * node in the loop to any other node in the loop (wholly within the loop)
1519 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1520 * in the loop from outside the loop, and that is through the ENTRY
1523 /* Let's find the loop ENTRY */
1525 if (head->bbJumpKind == BBJ_ALWAYS)
1527 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1529 /* OK - we enter somewhere within the loop */
1530 entry = head->bbJumpDest;
1532 /* some useful asserts
1533 * Cannot enter at the top - should have being caught by redundant jumps */
1535 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1539 /* special case - don't consider now */
1540 // assert (!"Loop entered in weird way!");
1544 // Can we fall through into the loop?
1545 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1547 /* The ENTRY is at the TOP (a do-while loop) */
1552 goto NO_LOOP; // head does not flow into the loop bail for now
1555 // Now we find the "first" block -- the earliest block reachable within the loop.
1556 // This is usually the same as "top", but can differ in rare cases where "top" is
1557 // the entry block of a nested loop, and that nested loop branches backwards to a
1558 // a block before "top". We find this by searching for such backwards branches
1559 // in the loop known so far.
1560 BasicBlock* first = top;
1561 BasicBlock* newFirst;
1562 bool blocksToSearch = true;
1563 BasicBlock* validatedAfter = bottom->bbNext;
1564 while (blocksToSearch)
1566 blocksToSearch = false;
1568 blocksToSearch = false;
1569 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1571 unsigned nSucc = loopBlock->NumSucc();
1572 for (unsigned j = 0; j < nSucc; j++)
1574 BasicBlock* succ = loopBlock->GetSucc(j);
1575 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1576 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1582 if (newFirst != nullptr)
1584 validatedAfter = first;
1586 blocksToSearch = true;
1590 // Is "head" still before "first"? If not, we don't have a valid loop...
1591 if (head->bbNum >= first->bbNum)
1594 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1595 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1599 /* Make sure ENTRY dominates all blocks in the loop
1600 * This is necessary to ensure condition 2. above
1601 * At the same time check if the loop has a single exit
1602 * point - those loops are easier to optimize */
1604 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1606 if (!fgDominate(entry, loopBlock))
1611 if (loopBlock == bottom)
1613 if (bottom->bbJumpKind != BBJ_ALWAYS)
1615 /* there is an exit at the bottom */
1617 noway_assert(bottom->bbJumpDest == top);
1624 BasicBlock* exitPoint;
1626 switch (loopBlock->bbJumpKind)
1629 case BBJ_CALLFINALLY:
1631 case BBJ_EHCATCHRET:
1632 assert(loopBlock->bbJumpDest);
1633 exitPoint = loopBlock->bbJumpDest;
1635 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1637 /* exit from a block other than BOTTOM */
1646 case BBJ_EHFINALLYRET:
1647 case BBJ_EHFILTERRET:
1648 /* The "try" associated with this "finally" must be in the
1649 * same loop, so the finally block will return control inside the loop */
1654 /* those are exits from the loop */
1662 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1663 BasicBlock** jumpTab;
1664 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1668 noway_assert(*jumpTab);
1669 exitPoint = *jumpTab;
1671 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1676 } while (++jumpTab, --jumpCnt);
1680 noway_assert(!"Unexpected bbJumpKind");
1685 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1686 * This is to ensure condition 1. above which prevents marking fake loops
1688 * Below is an example:
1696 * The example above is not a loop since we bail after the first iteration
1698 * The condition we have to check for is
1699 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1700 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1702 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1703 * loop bottom then we cannot iterate
1705 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1706 * part of the loop nodes (as per definition they are loop exits executed only once),
1707 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1709 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1711 switch (loopBlock->bbJumpKind)
1716 if (fgDominate(loopBlock, bottom))
1725 bool canIterateLoop = false;
1727 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1729 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1731 canIterateLoop = true;
1734 else if (predEntry->flBlock != head)
1736 // The entry block has multiple predecessors outside the loop; the 'head'
1737 // block isn't the only one. We only support a single 'head', so bail.
1742 if (!canIterateLoop)
1747 /* Double check - make sure that all loop blocks except ENTRY
1748 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1749 * us from considering non-loops due to incorrectly assuming that we had a back edge
1752 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1755 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1757 if (loopBlock == entry)
1762 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1764 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1766 // noway_assert(!"Found loop with multiple entries");
1772 // Disqualify loops where the first block of the loop is less nested in EH than
1773 // the bottom block. That is, we don't want to handle loops where the back edge
1774 // goes from within an EH region to a first block that is outside that same EH
1775 // region. Note that we *do* handle loops where the first block is the *first*
1776 // block of a more nested EH region (since it is legal to branch to the first
1777 // block of an immediately more nested EH region). So, for example, disqualify
1784 // BB10 BBJ_COND => BB02
1788 // Here, BB10 is more nested than BB02.
1790 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1792 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1794 first->bbNum, bottom->bbNum);
1798 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1799 // Disqualify loops where the first block of the loop is a finally target.
1800 // The main problem is when multiple loops share a 'first' block that is a finally
1801 // target and we canonicalize the loops by adding a new loop head. In that case, we
1802 // need to update the blocks so the finally target bit is moved to the newly created
1803 // block, and removed from the old 'first' block. This is 'hard', so at this point
1804 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1805 // long-term), it's easier to disallow the loop than to update the flow graph to
1806 // support this case.
1808 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1810 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1813 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1815 /* At this point we have a loop - record it in the loop table
1816 * If we found only one exit, record it in the table too
1817 * (otherwise an exit = 0 in the loop table means multiple exits) */
1824 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1827 if (!hasMethodLoops)
1829 /* mark the method as containing natural loops */
1831 hasMethodLoops = true;
1834 /* increment total number of loops found */
1838 /* keep track of the number of exits */
1839 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1840 #endif // COUNT_LOOPS
1843 /* current predecessor not good for a loop - continue with another one, if any */
1849 loopCountTable.record(loopsThisMethod);
1850 if (maxLoopsPerMethod < loopsThisMethod)
1852 maxLoopsPerMethod = loopsThisMethod;
1854 if (loopOverflowThisMethod)
1856 totalLoopOverflows++;
1858 #endif // COUNT_LOOPS
1860 // Now the loop indices are stable. We can figure out parent/child relationships
1861 // (using table indices to name loops), and label blocks.
1862 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1864 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1867 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1869 optLoopTable[loopInd].lpParent = possibleParent;
1870 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1871 optLoopTable[possibleParent].lpChild = loopInd;
1877 // Now label the blocks with the innermost loop to which they belong. Since parents
1878 // precede children in the table, doing the labeling for each loop in order will achieve
1879 // this -- the innermost loop labeling will be done last.
1880 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1882 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1883 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1884 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1886 blk->bbNatLoopNum = loopInd;
1891 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1895 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1896 // one, if necessary, for loops containing others that share a "top."
1898 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1900 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1901 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1907 if (optCanonicalizeLoopNest(loopInd))
1914 fgUpdateChangedFlowGraph();
1918 if (verbose && optLoopCount > 0)
1920 printf("\nFinal natural loop table:\n");
1921 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1923 optPrintLoopInfo(loopInd);
1930 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1932 BasicBlock* newJumpDest = nullptr;
1933 switch (blk->bbJumpKind)
1938 case BBJ_EHFILTERRET:
1939 case BBJ_EHFINALLYRET:
1940 case BBJ_EHCATCHRET:
1941 // These have no jump destination to update.
1946 case BBJ_CALLFINALLY:
1948 // All of these have a single jump destination to update.
1949 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1951 blk->bbJumpDest = newJumpDest;
1957 bool redirected = false;
1958 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1960 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1962 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1966 // If any redirections happend, invalidate the switch table map for the switch.
1969 GetSwitchDescMap()->Remove(blk);
1979 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1980 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1982 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1984 // copy the jump destination(s) from "from" to "to".
1985 switch (to->bbJumpKind)
1989 case BBJ_CALLFINALLY:
1991 // All of these have a single jump destination to update.
1992 to->bbJumpDest = from->bbJumpDest;
1997 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1998 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1999 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
2001 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
2003 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2013 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2014 // Returns 'true' if the flow graph is modified.
2015 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2017 bool modified = false;
2019 // Is the top of the current loop not in any nested loop?
2020 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2022 if (optCanonicalizeLoop(loopInd))
2028 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2029 child = optLoopTable[child].lpSibling)
2031 if (optCanonicalizeLoopNest(child))
2040 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2042 // Is the top uniquely part of the current loop?
2043 BasicBlock* t = optLoopTable[loopInd].lpTop;
2045 if (t->bbNatLoopNum == loopInd)
2050 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2052 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2054 // Otherwise, the top of this loop is also part of a nested loop.
2056 // Insert a new unique top for this loop. We must be careful to put this new
2057 // block in the correct EH region. Note that f->bbPrev might be in a different
2058 // EH region. For example:
2066 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2067 // On the other hand, the first block of multiple loops might be the first
2068 // block of a 'try' region that is completely contained in the multiple loops.
2073 // BB10 BBJ_ALWAYS => BB08
2075 // BB12 BBJ_ALWAYS => BB08
2077 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2078 // is a single-block "try" region. Neither loop "bottom" block is in the same
2079 // "try" region as BB08. This is legal because you can jump to the first block
2080 // of a try region. With EH normalization, no two "try" regions will share
2081 // this block. In this case, we need to insert a new block for the outer loop
2082 // in the same EH region as the branch from the "bottom":
2087 // BB10 BBJ_ALWAYS => BB08
2089 // BB12 BBJ_ALWAYS => BB30
2091 // Another possibility is that the "first" block of the loop nest can be the first block
2092 // of a "try" region that also has other predecessors than those in the loop, or even in
2093 // the "try" region (since blocks can target the first block of a "try" region). For example:
2097 // BB10 BBJ_ALWAYS => BB08
2099 // BB12 BBJ_ALWAYS => BB08
2102 // BB20 BBJ_ALWAYS => BB08
2104 // BB25 BBJ_ALWAYS => BB08
2106 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2107 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2108 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2109 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2110 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2111 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2112 // situation of branching to a non-first block of a 'try' region.
2114 // We can also have a loop nest where the "first" block is outside of a "try" region
2115 // and the back edges are inside a "try" region, for example:
2119 // BB09 try { BBJ_COND => BB02
2121 // BB15 BBJ_COND => BB02
2123 // BB21 } // end of "try"
2125 // In this case, both loop back edges were formed by "leave" instructions that were
2126 // imported into branches that were later made conditional. In this case, we don't
2127 // want to copy the EH region of the back edge, since that would create a block
2128 // outside of and disjoint with the "try" region of the back edge. However, to
2129 // simplify things, we disqualify this type of loop, so we should never see this here.
2131 BasicBlock* h = optLoopTable[loopInd].lpHead;
2132 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2133 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2135 // The loop must be entirely contained within a single handler region.
2136 assert(BasicBlock::sameHndRegion(f, b));
2138 // If the bottom block is in the same "try" region, then we extend the EH
2139 // region. Otherwise, we add the new block outside the "try" region.
2140 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2141 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2144 // We need to set the EH region manually. Set it to be the same
2145 // as the bottom block.
2146 newT->copyEHRegion(b);
2149 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2151 // Redirect the "bottom" of the current loop to "newT".
2152 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2153 blockMap->Set(t, newT);
2154 optRedirectBlock(b, blockMap);
2156 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2157 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2158 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2159 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2162 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2163 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2164 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2165 // edge of the blockMap, so nothing will happen.
2166 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2168 BasicBlock* topPredBlock = topPred->flBlock;
2170 // Skip if topPredBlock is in the loop.
2171 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2172 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2173 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2174 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2176 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2177 "redirecting its bottom edge\n",
2178 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2182 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2184 optRedirectBlock(topPredBlock, blockMap);
2187 assert(newT->bbNext == f);
2190 newT->bbJumpKind = BBJ_ALWAYS;
2191 newT->bbJumpDest = t;
2192 newT->bbTreeList = nullptr;
2193 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2196 // If it had been a do-while loop (top == entry), update entry, as well.
2197 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2198 if (optLoopTable[loopInd].lpTop == origE)
2200 optLoopTable[loopInd].lpEntry = newT;
2202 optLoopTable[loopInd].lpTop = newT;
2203 optLoopTable[loopInd].lpFirst = newT;
2205 newT->bbNatLoopNum = loopInd;
2207 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2208 dspPtr(newT), loopInd);
2210 // Make sure the head block still goes to the entry...
2211 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2213 h->bbJumpKind = BBJ_ALWAYS;
2214 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2216 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2218 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2219 optLoopTable[loopInd].lpHead = h2;
2220 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2221 h2->bbTreeList = nullptr;
2222 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2225 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2226 // it must be the case that they were do-while's (since "h" fell through to the entry).
2227 // The new node "newT" becomes the head of such loops.
2228 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2229 childLoop = optLoopTable[childLoop].lpSibling)
2231 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2232 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2234 optUpdateLoopHead(childLoop, h, newT);
2240 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2242 assert(l1 != BasicBlock::NOT_IN_LOOP);
2247 else if (l2 == BasicBlock::NOT_IN_LOOP)
2253 return optLoopContains(l1, optLoopTable[l2].lpParent);
2257 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2259 assert(optLoopTable[loopInd].lpHead == from);
2260 optLoopTable[loopInd].lpHead = to;
2261 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2262 childLoop = optLoopTable[childLoop].lpSibling)
2264 if (optLoopTable[childLoop].lpHead == from)
2266 optUpdateLoopHead(childLoop, from, to);
2271 /*****************************************************************************
2272 * If the : i += const" will cause an overflow exception for the small types.
2275 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2282 type_MAX = SCHAR_MAX;
2285 type_MAX = UCHAR_MAX;
2288 type_MAX = SHRT_MAX;
2291 type_MAX = USHRT_MAX;
2294 case TYP_UINT: // Detected by checking for 32bit ....
2296 return false; // ... overflow same as done for TYP_INT
2302 if (iterAtExit > type_MAX)
2312 /*****************************************************************************
2313 * If the "i -= const" will cause an underflow exception for the small types
2316 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2323 type_MIN = SCHAR_MIN;
2326 type_MIN = SHRT_MIN;
2335 case TYP_UINT: // Detected by checking for 32bit ....
2337 return false; // ... underflow same as done for TYP_INT
2343 if (iterAtExit < type_MIN)
2353 /*****************************************************************************
2355 * Helper for unroll loops - Computes the number of repetitions
2356 * in a constant loop. If it cannot prove the number is constant returns false
2359 bool Compiler::optComputeLoopRep(int constInit,
2362 genTreeOps iterOper,
2363 var_types iterOperType,
2364 genTreeOps testOper,
2367 unsigned* iterCount)
2369 noway_assert(genActualType(iterOperType) == TYP_INT);
2372 __int64 constLimitX;
2377 // Using this, we can just do a signed comparison with other 32 bit values.
2380 constLimitX = (unsigned int)constLimit;
2384 constLimitX = (signed int)constLimit;
2387 switch (iterOperType)
2389 // For small types, the iteration operator will narrow these values if big
2391 #define INIT_ITER_BY_TYPE(type) \
2392 constInitX = (type)constInit; \
2393 iterInc = (type)iterInc;
2396 INIT_ITER_BY_TYPE(signed char);
2399 INIT_ITER_BY_TYPE(unsigned char);
2402 INIT_ITER_BY_TYPE(signed short);
2405 INIT_ITER_BY_TYPE(unsigned short);
2408 // For the big types, 32 bit arithmetic is performed
2414 constInitX = (unsigned int)constInit;
2418 constInitX = (signed int)constInit;
2423 noway_assert(!"Bad type");
2427 /* If iterInc is zero we have an infinite loop */
2433 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2434 iterSign = (iterInc > 0) ? +1 : -1;
2436 /* Initialize loopCount to zero */
2439 // If dupCond is true then the loop head contains a test which skips
2440 // this loop, if the constInit does not pass the loop test
2441 // Such a loop can execute zero times.
2442 // If dupCond is false then we have a true do-while loop which we
2443 // always execute the loop once before performing the loop test
2447 constInitX += iterInc;
2450 // bail if count is based on wrap-around math
2453 if (constLimitX < constInitX)
2458 else if (constLimitX > constInitX)
2463 /* Compute the number of repetitions */
2467 __int64 iterAtExitX;
2470 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2474 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2475 * have a constant number of iterations or loop forever -
2476 * we have to compute (lim-init) mod iterInc to see if it is zero.
2477 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2478 * which is probably not what the end user wanted, but it is legal.
2483 /* Stepping by one, i.e. Mod with 1 is always zero */
2486 if (((constLimitX - constInitX) % iterInc) != 0)
2494 noway_assert(iterInc < 0);
2495 /* Stepping by -1, i.e. Mod with 1 is always zero */
2498 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2514 if (constInitX != constLimitX)
2516 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2519 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2523 iterAtExitX = (unsigned)iterAtExitX;
2526 // Check if iteration incr will cause overflow for small types
2527 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2532 // iterator with 32bit overflow. Bad for TYP_(U)INT
2533 if (iterAtExitX < constLimitX)
2538 *iterCount = loopCount;
2554 noway_assert(!"Unknown operator for loop iterator");
2568 if (constInitX < constLimitX)
2570 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2573 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2577 iterAtExitX = (unsigned)iterAtExitX;
2580 // Check if iteration incr will cause overflow for small types
2581 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2586 // iterator with 32bit overflow. Bad for TYP_(U)INT
2587 if (iterAtExitX < constLimitX)
2592 *iterCount = loopCount;
2608 noway_assert(!"Unknown operator for loop iterator");
2622 if (constInitX <= constLimitX)
2624 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2627 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2631 iterAtExitX = (unsigned)iterAtExitX;
2634 // Check if iteration incr will cause overflow for small types
2635 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2640 // iterator with 32bit overflow. Bad for TYP_(U)INT
2641 if (iterAtExitX <= constLimitX)
2646 *iterCount = loopCount;
2662 noway_assert(!"Unknown operator for loop iterator");
2676 if (constInitX > constLimitX)
2678 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2681 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2685 iterAtExitX = (unsigned)iterAtExitX;
2688 // Check if small types will underflow
2689 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2694 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2695 if (iterAtExitX > constLimitX)
2700 *iterCount = loopCount;
2716 noway_assert(!"Unknown operator for loop iterator");
2730 if (constInitX >= constLimitX)
2732 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2735 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2739 iterAtExitX = (unsigned)iterAtExitX;
2742 // Check if small types will underflow
2743 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2748 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2749 if (iterAtExitX >= constLimitX)
2754 *iterCount = loopCount;
2770 noway_assert(!"Unknown operator for loop iterator");
2775 noway_assert(!"Unknown operator for loop condition");
2781 /*****************************************************************************
2783 * Look for loop unrolling candidates and unroll them
2787 #pragma warning(push)
2788 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2790 void Compiler::optUnrollLoops()
2792 if (compCodeOpt() == SMALL_CODE)
2797 if (optLoopCount == 0)
2803 if (JitConfig.JitNoUnroll())
2809 if (optCanCloneLoops())
2817 printf("*************** In optUnrollLoops()\n");
2820 /* Look for loop unrolling candidates */
2822 /* Double loop so that after unrolling an inner loop we set change to true
2823 * and we then go back over all of the loop candidates and try to unroll
2824 * the next outer loop, until we don't unroll any loops,
2825 * then change will be false and we are done.
2829 bool change = false;
2831 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2845 int lbeg; // initial value for iterator
2846 int llim; // limit value for iterator
2847 unsigned lvar; // iterator lclVar #
2848 int iterInc; // value to increment the iterator
2849 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2850 var_types iterOperType; // type result of the oper (for overflow instrs)
2851 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2852 bool unsTest; // Is the comparison u/int
2854 unsigned totalIter; // total number of iterations in the constant loop
2855 unsigned loopCostSz; // Cost is size of one iteration
2856 unsigned loopFlags; // actual lpFlags
2857 unsigned requiredFlags; // required lpFlags
2859 GenTree* loopList; // new stmt list of the unrolled loop
2862 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2869 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2870 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2872 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2875 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2881 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2888 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2889 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2891 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2894 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2896 unrollLimitSz *= 10;
2900 loopFlags = optLoopTable[lnum].lpFlags;
2901 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2903 /* Ignore the loop if we don't have a do-while with a single exit
2904 that has a constant number of iterations */
2906 if ((loopFlags & requiredFlags) != requiredFlags)
2911 /* ignore if removed or marked as not unrollable */
2913 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2918 head = optLoopTable[lnum].lpHead;
2920 bottom = optLoopTable[lnum].lpBottom;
2921 noway_assert(bottom);
2923 /* The single exit must be at the bottom of the loop */
2924 noway_assert(optLoopTable[lnum].lpExit);
2925 if (optLoopTable[lnum].lpExit != bottom)
2930 /* Unrolling loops with jumps in them is not worth the headache
2931 * Later we might consider unrolling loops after un-switching */
2936 block = block->bbNext;
2937 noway_assert(block);
2939 if (block->bbJumpKind != BBJ_NONE)
2941 if (block != bottom)
2946 } while (block != bottom);
2948 /* Get the loop data:
2952 - iterator increment
2953 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2954 - loop test type (i.e. GT_GE, GT_LT, etc...)
2957 lbeg = optLoopTable[lnum].lpConstInit;
2958 llim = optLoopTable[lnum].lpConstLimit();
2959 testOper = optLoopTable[lnum].lpTestOper();
2961 lvar = optLoopTable[lnum].lpIterVar();
2962 iterInc = optLoopTable[lnum].lpIterConst();
2963 iterOper = optLoopTable[lnum].lpIterOper();
2965 iterOperType = optLoopTable[lnum].lpIterOperType();
2966 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2968 if (lvaTable[lvar].lvAddrExposed)
2969 { // If the loop iteration variable is address-exposed then bail
2972 if (lvaTable[lvar].lvIsStructField)
2973 { // If the loop iteration variable is a promoted field from a struct then
2978 /* Locate the pre-header and initialization and increment/test statements */
2980 phdr = head->bbTreeList;
2982 loop = bottom->bbTreeList;
2985 init = head->lastStmt();
2986 noway_assert(init && (init->gtNext == nullptr));
2987 test = bottom->lastStmt();
2988 noway_assert(test && (test->gtNext == nullptr));
2989 incr = test->gtPrev;
2992 if (init->gtFlags & GTF_STMT_CMPADD)
2994 /* Must be a duplicated loop condition */
2995 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2998 init = init->gtPrev;
3006 /* Find the number of iterations - the function returns false if not a constant number */
3008 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3013 /* Forget it if there are too many repetitions or not a constant loop */
3015 if (totalIter > iterLimit)
3020 noway_assert(init->gtOper == GT_STMT);
3021 init = init->gtStmt.gtStmtExpr;
3022 noway_assert(test->gtOper == GT_STMT);
3023 test = test->gtStmt.gtStmtExpr;
3024 noway_assert(incr->gtOper == GT_STMT);
3025 incr = incr->gtStmt.gtStmtExpr;
3027 // Don't unroll loops we don't understand.
3028 if (incr->gtOper == GT_ASG)
3033 /* Make sure everything looks ok */
3034 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3035 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3036 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3038 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
3039 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
3040 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3042 (test->gtOper != GT_JTRUE))
3044 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3048 /* heuristic - Estimated cost in code size of the unrolled loop */
3056 block = block->bbNext;
3058 /* Visit all the statements in the block */
3060 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3062 /* Get the expression and stop if end reached */
3064 GenTreePtr expr = stmt->gtStmtExpr;
3070 /* Calculate gtCostSz */
3071 gtSetStmtInfo(stmt);
3073 /* Update loopCostSz */
3074 loopCostSz += stmt->gtCostSz;
3076 } while (block != bottom);
3078 /* Compute the estimated increase in code size for the unrolled loop */
3080 unsigned int fixedLoopCostSz;
3081 fixedLoopCostSz = 8;
3084 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3086 /* Don't unroll if too much code duplication would result. */
3088 if (unrollCostSz > unrollLimitSz)
3090 /* prevent this loop from being revisited */
3091 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3095 /* Looks like a good idea to unroll this loop, let's do it! */
3096 CLANG_FORMAT_COMMENT_ANCHOR;
3101 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3102 if (head->bbNext->bbNum != bottom->bbNum)
3104 printf("..BB%02u", bottom->bbNum);
3106 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3107 printf(" unrollCostSz = %d\n", unrollCostSz);
3112 /* Create the unrolled loop statement list */
3114 loopList = loopLast = nullptr;
3116 for (lval = lbeg; totalIter; totalIter--)
3125 block = block->bbNext;
3126 noway_assert(block);
3128 /* Visit all the statements in the block */
3130 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3132 /* Stop if we've reached the end of the loop */
3134 if (stmt->gtStmtExpr == incr)
3139 /* Clone/substitute the expression */
3141 expr = gtCloneExpr(stmt, 0, lvar, lval);
3143 // cloneExpr doesn't handle everything
3147 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3151 /* Append the expression to our list */
3155 loopLast->gtNext = expr;
3162 expr->gtPrev = loopLast;
3165 } while (block != bottom);
3167 /* update the new value for the unrolled iterator */
3181 noway_assert(!"Unrolling not implemented for this loop iterator");
3185 noway_assert(!"Unknown operator for constant loop iterator");
3190 /* Finish the linked list */
3194 loopList->gtPrev = loopLast;
3195 loopLast->gtNext = nullptr;
3198 /* Replace the body with the unrolled one */
3204 block = block->bbNext;
3205 noway_assert(block);
3206 block->bbTreeList = nullptr;
3207 block->bbJumpKind = BBJ_NONE;
3208 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3209 } while (block != bottom);
3211 bottom->bbJumpKind = BBJ_NONE;
3212 bottom->bbTreeList = loopList;
3213 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3214 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3218 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3220 /* Update bbRefs and bbPreds */
3221 /* Here head->bbNext is bottom !!! - Replace it */
3223 fgRemoveRefPred(head->bbNext, bottom);
3225 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3226 * (the last value of the iterator in the loop)
3227 * and drop the jump condition since the unrolled loop will always execute */
3229 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3231 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3233 if (head->bbJumpKind == BBJ_COND)
3235 phdr = head->bbTreeList;
3237 test = phdr->gtPrev;
3239 noway_assert(test && (test->gtNext == nullptr));
3240 noway_assert(test->gtOper == GT_STMT);
3241 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3243 init = test->gtPrev;
3244 noway_assert(init && (init->gtNext == test));
3245 noway_assert(init->gtOper == GT_STMT);
3247 init->gtNext = nullptr;
3248 phdr->gtPrev = init;
3249 head->bbJumpKind = BBJ_NONE;
3250 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3252 /* Update bbRefs and bbPreds */
3254 fgRemoveRefPred(head->bbJumpDest, head);
3258 /* the loop must execute */
3259 noway_assert(head->bbJumpKind == BBJ_NONE);
3265 printf("Whole unrolled loop:\n");
3267 GenTreePtr s = loopList;
3271 noway_assert(s->gtOper == GT_STMT);
3282 /* Remember that something has changed */
3286 /* Make sure to update loop table */
3288 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3289 * (also make head and bottom NULL - to hit an assert or GPF) */
3291 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3292 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3304 fgDebugCheckBBlist();
3308 #pragma warning(pop)
3311 /*****************************************************************************
3313 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3314 * not execute a method call.
3317 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3319 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3320 // as some helper calls are neither interruptible nor hijackable.
3321 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3322 // those helpers too.
3324 noway_assert(topBB->bbNum <= botBB->bbNum);
3326 // We can always check topBB and botBB for any gc safe points and early out
3328 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3333 // Otherwise we will need to rely upon the dominator sets
3335 if (!fgDomsComputed)
3337 // return a conservative answer of true when we don't have the dominator sets
3341 BasicBlock* curBB = topBB;
3344 noway_assert(curBB);
3346 // If we added a loop pre-header block then we will
3347 // have a bbNum greater than fgLastBB, and we won't have
3348 // any dominator information about this block, so skip it.
3350 if (curBB->bbNum <= fgLastBB->bbNum)
3352 noway_assert(curBB->bbNum <= botBB->bbNum);
3354 // Does this block contain a gc safe point?
3356 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3358 // Will this block always execute on the way to botBB ?
3360 // Since we are checking every block in [topBB .. botBB] and we are using
3361 // a lexical definition of a loop.
3362 // (all that we know is that is that botBB is a back-edge to topBB)
3363 // Thus while walking blocks in this range we may encounter some blocks
3364 // that are not really part of the loop, and so we need to perform
3365 // some additional checks:
3367 // We will check that the current 'curBB' is reachable from 'topBB'
3368 // and that it dominates the block containing the back-edge 'botBB'
3369 // When both of these are true then we know that the gcsafe point in 'curBB'
3370 // will be encountered in the loop and we can return false
3372 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3379 // If we've reached the destination block, then we're done
3388 curBB = curBB->bbNext;
3391 // If we didn't find any blocks that contained a gc safe point and
3392 // also met the fgDominate and fgReachable criteria then we must return true
3397 /*****************************************************************************
3399 * Find the loop termination test at the bottom of the loop
3402 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3404 GenTreePtr testt = bottom->bbTreeList;
3406 assert(testt && testt->gtOper == GT_STMT);
3408 GenTreePtr result = testt->gtPrev;
3411 while (testt->gtNext)
3413 testt = testt->gtNext;
3416 assert(testt == result);
3422 /*****************************************************************************
3423 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3426 void Compiler::fgOptWhileLoop(BasicBlock* block)
3428 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3429 noway_assert(compCodeOpt() != SMALL_CODE);
3432 Optimize while loops into do { } while loop
3433 Our loop hoisting logic requires do { } while loops.
3434 Specifically, we're looking for the following case:
3445 If we find this, and the condition is simple enough, we change
3446 the loop to the following:
3451 // else fall-through
3462 /* Does the BB end with an unconditional jump? */
3464 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3465 { // It can't be one of the ones we use for our exception magic
3469 // It has to be a forward jump
3470 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3472 if (fgIsForwardBranch(block) == false)
3477 // Get hold of the jump target
3478 BasicBlock* bTest = block->bbJumpDest;
3480 // Does the block consist of 'jtrue(cond) block' ?
3481 if (bTest->bbJumpKind != BBJ_COND)
3486 // bTest must be a backwards jump to block->bbNext
3487 if (bTest->bbJumpDest != block->bbNext)
3492 // Since test is a BBJ_COND it will have a bbNext
3493 noway_assert(bTest->bbNext);
3495 // 'block' must be in the same try region as the condition, since we're going to insert
3496 // a duplicated condition in 'block', and the condition might include exception throwing code.
3497 if (!BasicBlock::sameTryRegion(block, bTest))
3502 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3503 // same try region (or no try region) to avoid generating illegal flow.
3504 BasicBlock* bTestNext = bTest->bbNext;
3505 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3510 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3512 // bTest must only contain only a jtrue with no other stmts, we will only clone
3513 // the conditional, so any other statements will not get cloned
3514 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3516 if (bTest->bbTreeList != condStmt)
3521 /* Get to the condition node from the statement tree */
3523 noway_assert(condStmt->gtOper == GT_STMT);
3525 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3526 noway_assert(condTree->gtOper == GT_JTRUE);
3528 condTree = condTree->gtOp.gtOp1;
3530 // The condTree has to be a RelOp comparison
3531 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3533 if (condTree->OperIsCompare() == false)
3538 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3540 gtPrepareCost(condTree);
3541 unsigned estDupCostSz = condTree->gtCostSz;
3543 double loopIterations = (double)BB_LOOP_WEIGHT;
3545 bool allProfileWeightsAreValid = false;
3546 BasicBlock::weight_t weightBlock = block->bbWeight;
3547 BasicBlock::weight_t weightTest = bTest->bbWeight;
3548 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3550 // If we have profile data then we calculate the number of time
3551 // the loop will iterate into loopIterations
3552 if (fgIsUsingProfileWeights())
3554 // Only rely upon the profile weight when all three of these blocks
3555 // have good profile weights
3556 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3557 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3559 allProfileWeightsAreValid = true;
3561 // If this while loop never iterates then don't bother transforming
3562 if (weightNext == 0)
3567 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3568 // if the profile weights are all valid.
3570 // weightNext is the number of time this loop iterates
3571 // weightBlock is the number of times that we enter the while loop
3572 // loopIterations is the average number of times that this loop iterates
3574 if (weightTest >= weightBlock)
3576 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3581 unsigned maxDupCostSz = 32;
3583 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3584 // set loop weights yet
3585 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3590 // If this loop iterates a lot then raise the maxDupCost
3591 if (loopIterations >= 12.0)
3595 if (loopIterations >= 96.0)
3600 // If the loop condition has a shared static helper, we really want this loop converted
3601 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3602 // be executed on every loop iteration.
3603 int countOfHelpers = 0;
3604 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3606 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3608 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3611 // If the compare has too high cost then we don't want to dup
3613 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3618 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3619 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3620 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3621 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3622 allProfileWeightsAreValid ? "true" : "false");
3631 /* Looks good - duplicate the condition test */
3633 condTree->gtFlags |= GTF_RELOP_ZTT;
3635 condTree = gtCloneExpr(condTree);
3636 gtReverseCond(condTree);
3638 // Make sure clone expr copied the flag
3639 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3641 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3643 /* Create a statement entry out of the condition and
3644 append the condition test at the end of 'block' */
3646 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3648 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3650 if (opts.compDbgInfo)
3652 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3655 // Flag the block that received the copy as potentially having an array/vtable
3656 // reference if the block copied from did; this is a conservative guess.
3657 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3659 block->bbFlags |= copyFlags;
3662 // If we have profile data for all blocks and we know that we are cloning the
3663 // bTest block into block and thus changing the control flow from block so
3664 // that it no longer goes directly to bTest anymore, we have to adjust the
3665 // weight of bTest by subtracting out the weight of block.
3667 if (allProfileWeightsAreValid)
3670 // Some additional sanity checks before adjusting the weight of bTest
3672 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3674 // Get the two edge that flow out of bTest
3675 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3676 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3678 // Calculate the new weight for block bTest
3680 BasicBlock::weight_t newWeightTest =
3681 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3682 bTest->bbWeight = newWeightTest;
3684 if (newWeightTest == BB_ZERO_WEIGHT)
3686 bTest->bbFlags |= BBF_RUN_RARELY;
3687 // All out edge weights are set to zero
3688 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3689 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3690 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3691 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3695 // Update the our edge weights
3696 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3697 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3698 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3699 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3704 /* Change the block to end with a conditional jump */
3706 block->bbJumpKind = BBJ_COND;
3707 block->bbJumpDest = bTest->bbNext;
3709 /* Mark the jump dest block as being a jump target */
3710 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3712 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3714 fgAddRefPred(block->bbNext, block);
3716 fgRemoveRefPred(bTest, block);
3717 fgAddRefPred(bTest->bbNext, block);
3722 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3724 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3726 gtDispTree(copyOfCondStmt);
3732 /*****************************************************************************
3734 * Optimize the BasicBlock layout of the method
3737 void Compiler::optOptimizeLayout()
3739 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3744 printf("*************** In optOptimizeLayout()\n");
3748 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3749 fgDebugCheckBBlist();
3752 noway_assert(fgModified == false);
3754 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3756 /* Make sure the appropriate fields are initialized */
3758 if (block->bbWeight == BB_ZERO_WEIGHT)
3760 /* Zero weighted block can't have a LOOP_HEAD flag */
3761 noway_assert(block->isLoopHead() == false);
3765 assert(block->bbLoopNum == 0);
3767 if (compCodeOpt() != SMALL_CODE)
3769 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3771 fgOptWhileLoop(block);
3777 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3778 fgComputeEdgeWeights();
3781 fgUpdateFlowGraph(true);
3783 fgUpdateFlowGraph();
3786 /*****************************************************************************
3788 * Perform loop inversion, find and classify natural loops
3791 void Compiler::optOptimizeLoops()
3793 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3798 printf("*************** In optOptimizeLoops()\n");
3802 optSetBlockWeights();
3804 /* Were there any loops in the flow graph? */
3808 /* now that we have dominator information we can find loops */
3810 optFindNaturalLoops();
3812 unsigned loopNum = 0;
3814 /* Iterate over the flow graph, marking all loops */
3816 /* We will use the following terminology:
3817 * top - the first basic block in the loop (i.e. the head of the backward edge)
3818 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3819 * lastBottom - used when we have multiple back-edges to the same top
3826 for (top = fgFirstBB; top; top = top->bbNext)
3828 BasicBlock* foundBottom = nullptr;
3830 for (pred = top->bbPreds; pred; pred = pred->flNext)
3832 /* Is this a loop candidate? - We look for "back edges" */
3834 BasicBlock* bottom = pred->flBlock;
3836 /* is this a backward edge? (from BOTTOM to TOP) */
3838 if (top->bbNum > bottom->bbNum)
3843 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3845 if (top->isLoopHead() == false)
3850 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3852 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3857 /* the top block must be able to reach the bottom block */
3858 if (!fgReachable(top, bottom))
3863 /* Found a new loop, record the longest backedge in foundBottom */
3865 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3867 foundBottom = bottom;
3875 /* Mark the loop header as such */
3876 assert(FitsIn<unsigned char>(loopNum));
3877 top->bbLoopNum = (unsigned char)loopNum;
3880 /* Mark all blocks between 'top' and 'bottom' */
3882 optMarkLoopBlocks(top, foundBottom, false);
3885 // We track at most 255 loops
3889 totalUnnatLoopOverflows++;
3896 totalUnnatLoopCount += loopNum;
3904 printf("\nFound a total of %d loops.", loopNum);
3905 printf("\nAfter loop weight marking:\n");
3906 fgDispBasicBlocks();
3911 optLoopsMarked = true;
3915 //------------------------------------------------------------------------
3916 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3919 // loopNum - the current loop index for which conditions are derived.
3920 // context - data structure where all loop cloning info is kept.
3923 // "false" if conditions cannot be obtained. "true" otherwise.
3924 // The cloning conditions are updated in the "conditions"[loopNum] field
3925 // of the "context" parameter.
3928 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3929 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3930 // condition is "less than". If the initializer is "var" init then adds condition
3931 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3932 // are added to "context". These conditions are checked in the pre-header block
3933 // and the cloning choice is made.
3936 // Callers should assume AND operation is used i.e., if all conditions are
3937 // true, then take the fast path.
3939 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3941 JITDUMP("------------------------------------------------------------\n");
3942 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3944 LoopDsc* loop = &optLoopTable[loopNum];
3945 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3947 if (loop->lpTestOper() == GT_LT)
3949 // Stride conditions
3950 if (loop->lpIterConst() <= 0)
3952 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3957 if (loop->lpFlags & LPFLG_CONST_INIT)
3959 // Only allowing const init at this time.
3960 if (loop->lpConstInit < 0)
3962 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3966 else if (loop->lpFlags & LPFLG_VAR_INIT)
3969 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3970 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3971 context->EnsureConditions(loopNum)->Push(geZero);
3975 JITDUMP("> Not variable init\n");
3981 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3983 int limit = loop->lpConstLimit();
3986 JITDUMP("> limit %d is invalid\n", limit);
3989 ident = LC_Ident(limit, LC_Ident::Const);
3991 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3993 unsigned limitLcl = loop->lpVarLimit();
3994 ident = LC_Ident(limitLcl, LC_Ident::Var);
3996 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
3998 context->EnsureConditions(loopNum)->Push(geZero);
4000 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
4002 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
4003 if (!loop->lpArrLenLimit(this, index))
4005 JITDUMP("> ArrLen not matching");
4008 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4010 // Ensure that this array must be dereference-able, before executing the actual condition.
4011 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4012 context->EnsureDerefs(loopNum)->Push(array);
4016 JITDUMP("> Undetected limit\n");
4020 for (unsigned i = 0; i < optInfos->Size(); ++i)
4022 LcOptInfo* optInfo = optInfos->GetRef(i);
4023 switch (optInfo->GetOptType())
4025 case LcOptInfo::LcJaggedArray:
4028 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4029 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4030 LC_Ident arrLenIdent = LC_Ident(arrLen);
4032 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4033 context->EnsureConditions(loopNum)->Push(cond);
4035 // Ensure that this array must be dereference-able, before executing the actual condition.
4036 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4037 context->EnsureDerefs(loopNum)->Push(array);
4040 case LcOptInfo::LcMdArray:
4042 // limit <= mdArrLen
4043 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4044 LC_Condition cond(GT_LE, LC_Expr(ident),
4045 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4046 mdArrInfo->GetArrIndexForDim(getAllocator()),
4047 mdArrInfo->dim, LC_Array::None))));
4048 context->EnsureConditions(loopNum)->Push(cond);
4053 JITDUMP("Unknown opt\n");
4057 JITDUMP("Conditions: (");
4058 DBEXEC(verbose, context->PrintConditions(loopNum));
4065 //------------------------------------------------------------------------------------
4066 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4069 // loopNum - the current loop index for which conditions are derived.
4070 // context - data structure where all loop cloning info is kept.
4073 // "false" if conditions cannot be obtained. "true" otherwise.
4074 // The deref conditions are updated in the "derefConditions"[loopNum] field
4075 // of the "context" parameter.
4077 // Definition of Deref Conditions:
4078 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4079 // we should first be able to dereference "a". i.e., "a" is non-null.
4085 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4086 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4089 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4090 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4091 // be true to do the deref.
4093 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4095 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4096 // blocks each of which will branch to slow path if the condition evaluates to false.
4098 // Now, imagine a situation where we have
4099 // a[x][y][k] = 20 and a[i][j][k] = 0
4100 // also in the inner most loop where x, y are parameters, then our conditions will have
4104 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4106 // But these conditions can be checked together with conditions
4107 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4110 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4111 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4112 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4113 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4115 // This naturally yields a tree style pattern, where the nodes of the tree are
4116 // the array and indices respectively.
4132 // Notice that the variables in the same levels can have their conditions combined in the
4133 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4134 // combined with a short-circuiting AND (i.e., different basic blocks).
4137 // Construct a tree of array indices and the array which will generate the optimal
4138 // conditions for loop cloning.
4140 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4155 // In this method, we will construct such a tree by descending depth first into the array
4156 // index operation and forming a tree structure as we encounter the array or the index variables.
4158 // This tree structure will then be used to generate conditions like below:
4159 // (a != null) & (b != null) && // from the first level of the tree.
4161 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4162 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4164 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4165 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4170 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4172 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4175 // Get the dereference-able arrays.
4176 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4178 // For each array in the dereference list, construct a tree,
4179 // where the nodes are array and index variables and an edge 'u-v'
4180 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4181 // 'u-v-w' transitively if u[v][w] occurs.
4182 for (unsigned i = 0; i < deref->Size(); ++i)
4184 LC_Array& array = (*deref)[i];
4186 // First populate the array base variable.
4187 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4188 if (node == nullptr)
4190 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4194 // For each dimension (level) for the array, populate the tree with the variable
4195 // from that dimension.
4196 unsigned rank = (unsigned)array.GetDimRank();
4197 for (unsigned i = 0; i < rank; ++i)
4199 node->EnsureChildren(getAllocator());
4200 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4203 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4204 node->children->Push(tmp);
4207 // Descend one level down.
4211 // Keep the maxRank of all array dereferences.
4212 maxRank = max((int)rank, maxRank);
4218 for (unsigned i = 0; i < nodes.Size(); ++i)
4235 // First level will always yield the null-check, since it is made of the array base variables.
4236 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4237 // So add 1 after rank * 2.
4238 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4240 // Heuristic to not create too many blocks;
4246 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4247 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4248 for (unsigned i = 0; i < nodes.Size(); ++i)
4250 nodes[i]->DeriveLevelConditions(levelCond);
4253 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4258 //----------------------------------------------------------------------------
4259 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4262 // block - the block in which the helper call needs to be inserted.
4263 // insertBefore - the tree before which the helper call will be inserted.
4265 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4267 if (JitConfig.JitDebugLogLoopCloning() == 0)
4271 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4272 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4273 fgInsertStmtBefore(block, insertBefore, stmt);
4274 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4278 //------------------------------------------------------------------------
4279 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4280 // candidates gathered during the cloning phase.
4283 // loopNum - the current loop index for which the optimizations are performed.
4284 // context - data structure where all loop cloning info is kept.
4285 // dynamicPath - If true, the optimization is performed in the fast path among the
4286 // cloned loops. If false, it means this is the only path (i.e.,
4287 // there is no slow path.)
4290 // Perform the optimizations on the fast path i.e., the path in which the
4291 // optimization candidates were collected at the time of identifying them.
4292 // The candidates store all the information necessary (the tree/stmt/block
4293 // they are from) to perform the optimization.
4296 // The unoptimized path is either already cloned when this method is called or
4297 // there is no unoptimized path (got eliminated statically.) So this method
4298 // performs the optimizations assuming that the path in which the candidates
4299 // were collected is the fast path in which the optimizations will be performed.
4301 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4303 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4304 for (unsigned i = 0; i < optInfos->Size(); ++i)
4306 LcOptInfo* optInfo = optInfos->GetRef(i);
4307 switch (optInfo->GetOptType())
4309 case LcOptInfo::LcJaggedArray:
4311 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4312 compCurBB = arrIndexInfo->arrIndex.useBlock;
4313 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4315 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4318 case LcOptInfo::LcMdArray:
4319 // TODO-CQ: CLONE: Implement.
4327 //----------------------------------------------------------------------------
4328 // optCanCloneLoops: Use the environment flag to determine whether loop
4329 // cloning is allowed to be performed.
4332 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4333 // Disabled for retail for now.
4335 bool Compiler::optCanCloneLoops()
4337 // Enabled for retail builds now.
4338 unsigned cloneLoopsFlag = 1;
4340 cloneLoopsFlag = JitConfig.JitCloneLoops();
4342 return (cloneLoopsFlag != 0);
4345 //----------------------------------------------------------------------------
4346 // optIsLoopClonable: Determine whether this loop can be cloned.
4349 // loopInd loop index which needs to be checked if it can be cloned.
4352 // Returns true if the loop can be cloned. If it returns false
4353 // prints a message in debug as why the loop can't be cloned.
4355 bool Compiler::optIsLoopClonable(unsigned loopInd)
4357 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4358 // inserting new EH regions in the exception table yet.
4359 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4360 unsigned loopRetCount = 0;
4361 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4363 if (blk->bbJumpKind == BBJ_RETURN)
4367 if (bbIsTryBeg(blk))
4369 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4374 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4375 // into the middle of a handler (to go to the cloned copy.) Reject.
4376 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4378 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4382 // If the head and entry are in different EH regions, reject.
4383 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4385 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4389 // Is the first block after the last block of the loop a handler or filter start?
4390 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4391 // and go to where the original loop did. That raises problems when we don't actually go to
4392 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4393 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4394 // loop. This is just a corner to cut to get this working faster.
4395 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4396 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4398 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4402 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4403 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4404 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4405 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4406 if (fgReturnCount + loopRetCount > 4)
4408 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4409 "would exceed the limit of 4.\n",
4410 loopRetCount, fgReturnCount);
4414 // Otherwise, we're going to add those return blocks.
4415 fgReturnCount += loopRetCount;
4420 /*****************************************************************************
4422 * Identify loop cloning opportunities, derive loop cloning conditions,
4423 * perform loop cloning, use the derived conditions to choose which
4426 void Compiler::optCloneLoops()
4428 JITDUMP("\n*************** In optCloneLoops()\n");
4429 if (optLoopCount == 0 || !optCanCloneLoops())
4437 printf("Blocks/Trees at start of phase\n");
4438 fgDispBasicBlocks(true);
4442 LoopCloneContext context(optLoopCount, getAllocator());
4444 // Obtain array optimization candidates in the context.
4445 optObtainLoopCloningOpts(&context);
4447 // For each loop, derive cloning conditions for the optimization candidates.
4448 for (unsigned i = 0; i < optLoopCount; ++i)
4450 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4451 if (optInfos == nullptr)
4456 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4458 JITDUMP("> Conditions could not be obtained\n");
4459 context.CancelLoopOptInfo(i);
4463 bool allTrue = false;
4464 bool anyFalse = false;
4465 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4468 context.CancelLoopOptInfo(i);
4472 // Perform static optimizations on the fast path since we always
4473 // have to take the cloned path.
4474 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4476 // No need to clone.
4477 context.CancelLoopOptInfo(i);
4483 // The code in this #if has been useful in debugging loop cloning issues, by
4484 // enabling selective enablement of the loop cloning optimization according to
4487 unsigned methHash = info.compMethodHash();
4488 char* lostr = getenv("loopclonehashlo");
4489 unsigned methHashLo = 0;
4492 sscanf_s(lostr, "%x", &methHashLo);
4493 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4495 char* histr = getenv("loopclonehashhi");
4496 unsigned methHashHi = UINT32_MAX;
4499 sscanf_s(histr, "%x", &methHashHi);
4500 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4502 if (methHash < methHashLo || methHash > methHashHi)
4507 for (unsigned i = 0; i < optLoopCount; ++i)
4509 if (context.GetLoopOptInfo(i) != nullptr)
4512 context.OptimizeConditions(i DEBUGARG(verbose));
4513 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4514 optCloneLoop(i, &context);
4521 printf("\nAfter loop cloning:\n");
4522 fgDispBasicBlocks(/*dumpTrees*/ true);
4527 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4529 assert(loopInd < optLoopCount);
4531 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4532 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4533 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4535 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4536 unsigned depth = optLoopDepth(loopInd);
4537 unsigned ambientWeight = 1;
4538 for (unsigned j = 0; j < depth; j++)
4540 unsigned lastWeight = ambientWeight;
4541 ambientWeight *= BB_LOOP_WEIGHT;
4542 // If the multiplication overflowed, stick at max.
4543 // (Strictly speaking, a multiplication could overflow and still have a result
4544 // that is >= lastWeight...but if so, the original weight must be pretty large,
4545 // and it got bigger, so that's OK.)
4546 if (ambientWeight < lastWeight)
4548 ambientWeight = BB_MAX_WEIGHT;
4553 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4554 // Be safe by taking the max with the head block's weight.
4555 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4557 // This is the containing loop, if any -- to label any blocks we create that are outside
4558 // the loop being cloned.
4559 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4561 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4562 optEnsureUniqueHead(loopInd, ambientWeight);
4564 // We're going to make
4576 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4588 BasicBlock* h = optLoopTable[loopInd].lpHead;
4589 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4591 // Make a new block to be the unique entry to the loop.
4592 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4593 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4594 /*extendRegion*/ true);
4595 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4596 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4597 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4598 newH->bbNatLoopNum = ambientLoop;
4600 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4603 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4604 // "newPred" will be the predecessor of the blocks of the cloned loop.
4605 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4606 BasicBlock* newPred = b;
4607 if (b->bbJumpKind != BBJ_ALWAYS)
4609 BasicBlock* x = b->bbNext;
4612 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4613 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4615 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4616 x2->bbNatLoopNum = ambientLoop;
4619 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4624 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4625 // so that "h" already falls through to "e" (e == t == f).
4626 BasicBlock* h2 = nullptr;
4627 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4629 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4630 /*extendRegion*/ true);
4631 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4633 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4634 h2->bbNatLoopNum = ambientLoop;
4636 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4637 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4640 // Now we'll clone the blocks of the loop body.
4641 BasicBlock* newFirst = nullptr;
4642 BasicBlock* newBot = nullptr;
4644 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4645 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4648 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4649 /*extendRegion*/ true);
4651 BasicBlock::CloneBlockState(this, newBlk, blk);
4652 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4653 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4654 // loop, if one exists -- the parent of the loop we're cloning.
4655 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4657 if (newFirst == nullptr)
4661 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4663 blockMap->Set(blk, newBlk);
4666 // Perform the static optimizations on the fast path.
4667 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4669 // Now go through the new blocks, remapping their jump targets within the loop.
4670 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4674 BasicBlock* newblk = nullptr;
4675 bool b = blockMap->Lookup(blk, &newblk);
4676 assert(b && newblk != nullptr);
4678 assert(blk->bbJumpKind == newblk->bbJumpKind);
4680 // First copy the jump destination(s) from "blk".
4681 optCopyBlkDest(blk, newblk);
4683 // Now redirect the new block according to "blockMap".
4684 optRedirectBlock(newblk, blockMap);
4687 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4688 (h->bbJumpKind == BBJ_ALWAYS));
4690 // If all the conditions are true, go to E2.
4691 BasicBlock* e2 = nullptr;
4692 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4694 h->bbJumpKind = BBJ_COND;
4696 // We will create the following structure
4698 // cond0 (in h) -?> cond1
4699 // slow --> e2 (slow) always
4706 // We should always have block conditions, at the minimum, the array should be deref-able
4707 assert(context->HasBlockConditions(loopInd));
4709 // Create a unique header for the slow path.
4710 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4711 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4712 slowHead->bbNatLoopNum = ambientLoop;
4713 slowHead->bbJumpDest = e2;
4715 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4716 condLast->bbJumpDest = slowHead;
4718 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4721 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4723 assert(foundIt && e2 != nullptr);
4725 fgUpdateChangedFlowGraph();
4728 //--------------------------------------------------------------------------------------------------
4729 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4732 // context loop cloning context variable
4733 // loopNum the loop index
4734 // head loop head for "loopNum"
4735 // slowHead the slow path loop head
4741 // Create the following structure.
4743 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4744 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4746 // cond0 (in h) -?> cond1
4747 // slowHead --> e2 (slowHead) always
4748 // !cond1 -?> slowHead
4749 // !cond2 -?> slowHead
4751 // !condn -?> slowHead
4754 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4756 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4759 BasicBlock* slowHead)
4761 JITDUMP("Inserting loop cloning conditions\n");
4762 assert(context->HasBlockConditions(loopNum));
4764 BasicBlock* curCond = head;
4765 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4766 for (unsigned i = 0; i < levelCond->Size(); ++i)
4768 bool isHeaderBlock = (curCond == head);
4770 // Flip the condition if header block.
4771 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4773 // Create each condition block ensuring wiring between them.
4774 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4775 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4778 curCond->inheritWeight(head);
4779 curCond->bbNatLoopNum = head->bbNatLoopNum;
4780 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4783 // Finally insert cloning conditions after all deref conditions have been inserted.
4784 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4788 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4790 BasicBlock* h = optLoopTable[loopInd].lpHead;
4791 BasicBlock* t = optLoopTable[loopInd].lpTop;
4792 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4793 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4795 // If "h" dominates the entry block, then it is the unique header.
4796 if (fgDominate(h, e))
4801 // Otherwise, create a new empty header block, make it the pred of the entry block,
4802 // and redirect the preds of the entry block to go to this.
4804 BasicBlock* beforeTop = t->bbPrev;
4805 // Make sure that the new block is in the same region as the loop.
4806 // (We will only create loops that are entirely within a region.)
4807 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4808 // This is in the containing loop.
4809 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4810 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4812 // We don't care where it was put; splice it between beforeTop and top.
4813 if (beforeTop->bbNext != h2)
4815 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4816 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4820 if (h2->bbNext != e)
4822 h2->bbJumpKind = BBJ_ALWAYS;
4825 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4827 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4828 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4829 blockMap->Set(e, h2);
4831 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4833 BasicBlock* predBlock = predEntry->flBlock;
4835 // Skip if predBlock is in the loop.
4836 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4840 optRedirectBlock(predBlock, blockMap);
4843 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4846 /*****************************************************************************
4848 * Determine the kind of interference for the call.
4851 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4853 assert(call->gtOper == GT_CALL);
4855 // if not a helper, kills everything
4856 if (call->gtCall.gtCallType != CT_HELPER)
4861 // setfield and array address store kill all indirections
4862 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4864 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4865 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4866 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4867 case CORINFO_HELP_SETFIELDOBJ:
4868 case CORINFO_HELP_ARRADDR_ST:
4870 return CALLINT_REF_INDIRS;
4872 case CORINFO_HELP_SETFIELDFLOAT:
4873 case CORINFO_HELP_SETFIELDDOUBLE:
4874 case CORINFO_HELP_SETFIELD8:
4875 case CORINFO_HELP_SETFIELD16:
4876 case CORINFO_HELP_SETFIELD32:
4877 case CORINFO_HELP_SETFIELD64:
4879 return CALLINT_SCL_INDIRS;
4881 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4882 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4883 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4884 case CORINFO_HELP_SETFIELDSTRUCT:
4886 return CALLINT_ALL_INDIRS;
4892 // other helpers kill nothing
4893 return CALLINT_NONE;
4896 /*****************************************************************************
4898 * See if the given tree can be computed in the given precision (which must
4899 * be smaller than the type of the tree for this to make sense). If 'doit'
4900 * is false, we merely check to see whether narrowing is possible; if we
4901 * get called with 'doit' being true, we actually perform the narrowing.
4904 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4910 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4912 /* Assume we're only handling integer types */
4913 noway_assert(varTypeIsIntegral(srct));
4914 noway_assert(varTypeIsIntegral(dstt));
4916 unsigned srcSize = genTypeSize(srct);
4917 unsigned dstSize = genTypeSize(dstt);
4919 /* dstt must be smaller than srct to narrow */
4920 if (dstSize >= srcSize)
4925 /* Figure out what kind of a node we have */
4926 oper = tree->OperGet();
4927 kind = tree->OperKind();
4929 if (kind & GTK_ASGOP)
4931 noway_assert(doit == false);
4935 ValueNumPair NoVNPair = ValueNumPair();
4937 if (kind & GTK_LEAF)
4941 /* Constants can usually be narrowed by changing their value */
4942 CLANG_FORMAT_COMMENT_ANCHOR;
4944 #ifndef _TARGET_64BIT_
4949 lval = tree->gtIntConCommon.LngValue();
4978 if ((lval & lmask) != lval)
4983 tree->ChangeOperConst(GT_CNS_INT);
4984 tree->gtType = TYP_INT;
4985 tree->gtIntCon.gtIconVal = (int)lval;
4986 if (vnStore != nullptr)
4988 fgValueNumberTreeConst(tree);
4998 ival = tree->gtIntCon.gtIconVal;
5017 #ifdef _TARGET_64BIT_
5024 #endif // _TARGET_64BIT_
5029 if ((ival & imask) != ival)
5034 #ifdef _TARGET_64BIT_
5037 tree->gtType = TYP_INT;
5038 tree->gtIntCon.gtIconVal = (int)ival;
5039 if (vnStore != nullptr)
5041 fgValueNumberTreeConst(tree);
5044 #endif // _TARGET_64BIT_
5048 /* Operands that are in memory can usually be narrowed
5049 simply by changing their gtType */
5052 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5053 if (dstSize == sizeof(int))
5066 noway_assert(doit == false);
5070 if (kind & (GTK_BINOP | GTK_UNOP))
5073 op1 = tree->gtOp.gtOp1;
5075 op2 = tree->gtOp.gtOp2;
5077 switch (tree->gtOper)
5080 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5082 // Is op2 a small constant than can be narrowed into dstt?
5083 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5084 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5086 // We will change the type of the tree and narrow op2
5090 tree->gtType = genActualType(dstt);
5091 tree->SetVNs(vnpNarrow);
5093 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5094 // We may also need to cast away the upper bits of op1
5097 assert(tree->gtType == TYP_INT);
5098 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5100 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5102 tree->gtOp.gtOp1 = op1;
5113 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5115 noway_assert(doit == false);
5123 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5124 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5126 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5127 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5129 noway_assert(doit == false);
5133 /* Simply change the type of the tree */
5137 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5139 tree->gtFlags &= ~GTF_MUL_64RSLT;
5142 tree->gtType = genActualType(dstt);
5143 tree->SetVNs(vnpNarrow);
5151 /* Simply change the type of the tree */
5153 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5155 tree->gtType = genSignedType(dstt);
5156 tree->SetVNs(vnpNarrow);
5158 /* Make sure we don't mess up the variable type */
5159 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5161 tree->gtFlags |= GTF_VAR_CAST;
5174 /* These can always be narrowed since they only represent 0 or 1 */
5179 var_types cast = tree->CastToType();
5180 var_types oprt = op1->TypeGet();
5181 unsigned oprSize = genTypeSize(oprt);
5188 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5193 if (tree->gtOverflow())
5198 /* Is this a cast from the type we're narrowing to or a smaller one? */
5200 if (oprSize <= dstSize)
5202 /* Bash the target type of the cast */
5206 dstt = genSignedType(dstt);
5208 if (oprSize == dstSize)
5210 // Same size: change the CAST into a NOP
5211 tree->ChangeOper(GT_NOP);
5212 tree->gtType = dstt;
5213 tree->gtOp.gtOp2 = nullptr;
5214 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5218 // oprSize is smaller
5219 assert(oprSize < dstSize);
5221 // Change the CastToType in the GT_CAST node
5222 tree->CastToType() = dstt;
5224 // The result type of a GT_CAST is never a small type.
5225 // Use genActualType to widen dstt when it is a small types.
5226 tree->gtType = genActualType(dstt);
5227 tree->SetVNs(vnpNarrow);
5237 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5239 /* Simply change the type of the tree */
5243 tree->gtType = genActualType(dstt);
5244 tree->SetVNs(vnpNarrow);
5251 noway_assert(doit == false);
5259 /*****************************************************************************
5261 * The following logic figures out whether the given variable is assigned
5262 * somewhere in a list of basic blocks (or in an entire loop).
5265 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5267 GenTreePtr tree = *pTree;
5269 if (tree->OperKind() & GTK_ASGOP)
5271 GenTreePtr dest = tree->gtOp.gtOp1;
5272 genTreeOps destOper = dest->OperGet();
5274 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5275 assert(desc && desc->ivaSelf == desc);
5277 if (destOper == GT_LCL_VAR)
5279 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5280 if (tvar < lclMAX_ALLSET_TRACKED)
5282 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5286 desc->ivaMaskIncomplete = true;
5289 if (tvar == desc->ivaVar)
5291 if (tree != desc->ivaSkip)
5297 else if (destOper == GT_LCL_FLD)
5299 /* We can't track every field of every var. Moreover, indirections
5300 may access different parts of the var as different (but
5301 overlapping) fields. So just treat them as indirect accesses */
5303 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5304 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5306 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5307 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5309 else if (destOper == GT_CLS_VAR)
5311 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5313 else if (destOper == GT_IND)
5315 /* Set the proper indirection bits */
5317 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5318 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5321 else if (tree->gtOper == GT_CALL)
5323 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5324 assert(desc && desc->ivaSelf == desc);
5326 desc->ivaMaskCall = optCallInterf(tree);
5329 return WALK_CONTINUE;
5332 /*****************************************************************************/
5334 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5339 desc.ivaSkip = skip;
5341 desc.ivaSelf = &desc;
5344 desc.ivaMaskCall = CALLINT_NONE;
5345 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5351 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5353 noway_assert(stmt->gtOper == GT_STMT);
5354 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5376 /*****************************************************************************/
5377 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5381 /* Get hold of the loop descriptor */
5383 noway_assert(lnum < optLoopCount);
5384 loop = optLoopTable + lnum;
5386 /* Do we already know what variables are assigned within this loop? */
5388 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5395 /* Prepare the descriptor used by the tree walker call-back */
5397 desc.ivaVar = (unsigned)-1;
5398 desc.ivaSkip = nullptr;
5400 desc.ivaSelf = &desc;
5402 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5403 desc.ivaMaskInd = VR_NONE;
5404 desc.ivaMaskCall = CALLINT_NONE;
5405 desc.ivaMaskIncomplete = false;
5407 /* Now walk all the statements of the loop */
5409 beg = loop->lpHead->bbNext;
5410 end = loop->lpBottom;
5412 for (/**/; /**/; beg = beg->bbNext)
5416 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5418 noway_assert(stmt->gtOper == GT_STMT);
5419 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5421 if (desc.ivaMaskIncomplete)
5423 loop->lpFlags |= LPFLG_ASGVARS_INC;
5433 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5434 loop->lpAsgInds = desc.ivaMaskInd;
5435 loop->lpAsgCall = desc.ivaMaskCall;
5437 /* Now we know what variables are assigned in the loop */
5439 loop->lpFlags |= LPFLG_ASGVARS_YES;
5442 /* Now we can finally test the caller's mask against the loop's */
5443 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5448 switch (loop->lpAsgCall)
5452 /* Can't hoist if the call might have side effect on an indirection. */
5454 if (loop->lpAsgInds != VR_NONE)
5461 case CALLINT_REF_INDIRS:
5463 /* Can't hoist if the call might have side effect on an ref indirection. */
5465 if (loop->lpAsgInds & VR_IND_REF)
5472 case CALLINT_SCL_INDIRS:
5474 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5476 if (loop->lpAsgInds & VR_IND_SCL)
5483 case CALLINT_ALL_INDIRS:
5485 /* Can't hoist if the call might have side effect on any indirection. */
5487 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5496 /* Other helpers kill nothing */
5501 noway_assert(!"Unexpected lpAsgCall value");
5507 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5512 printf("\nHoisting a copy of ");
5513 printTreeID(origExpr);
5514 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5515 optLoopTable[lnum].lpBottom->bbNum);
5516 gtDispTree(origExpr);
5521 // This loop has to be in a form that is approved for hoisting.
5522 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5524 // Create a copy of the expression and mark it for CSE's.
5525 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5527 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5528 assert(hoistExpr != origExpr);
5529 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5531 GenTreePtr hoist = hoistExpr;
5532 // The value of the expression isn't used (unless it's an assignment).
5533 if (hoistExpr->OperGet() != GT_ASG)
5535 hoist = gtUnusedValNode(hoistExpr);
5538 /* Put the statement in the preheader */
5540 fgCreateLoopPreHeader(lnum);
5542 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5543 assert(preHead->bbJumpKind == BBJ_NONE);
5545 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5546 // (or in this case, will contain) the expression.
5547 compCurBB = preHead;
5549 // Increment the ref counts of any local vars appearing in "hoist".
5550 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5551 // fold away some of the lcl vars referenced by "hoist".
5552 lvaRecursiveIncRefCounts(hoist);
5554 hoist = fgMorphTree(hoist);
5556 GenTreePtr hoistStmt = gtNewStmt(hoist);
5557 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5559 /* simply append the statement at the end of the preHead's list */
5561 GenTreePtr treeList = preHead->bbTreeList;
5565 /* append after last statement */
5567 GenTreePtr last = treeList->gtPrev;
5568 assert(last->gtNext == nullptr);
5570 last->gtNext = hoistStmt;
5571 hoistStmt->gtPrev = last;
5572 treeList->gtPrev = hoistStmt;
5576 /* Empty pre-header - store the single statement in the block */
5578 preHead->bbTreeList = hoistStmt;
5579 hoistStmt->gtPrev = hoistStmt;
5582 hoistStmt->gtNext = nullptr;
5587 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5592 if (fgStmtListThreaded)
5594 gtSetStmtInfo(hoistStmt);
5595 fgSetStmtSeq(hoistStmt);
5599 if (m_nodeTestData != nullptr)
5602 // What is the depth of the loop "lnum"?
5604 unsigned lnumIter = lnum;
5605 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5608 lnumIter = optLoopTable[lnumIter].lpParent;
5611 NodeToTestDataMap* testData = GetNodeTestData();
5613 TestLabelAndNum tlAndN;
5614 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5616 if (tlAndN.m_num == -1)
5619 printTreeID(origExpr);
5620 printf(" was declared 'do not hoist', but is being hoisted.\n");
5623 else if (tlAndN.m_num != depth)
5626 printTreeID(origExpr);
5627 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5629 tlAndN.m_num, depth);
5634 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5635 // hoist" annotations.
5636 testData->Remove(origExpr);
5637 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5638 tlAndN.m_tl = TL_CSE_Def;
5639 tlAndN.m_num = m_loopHoistCSEClass++;
5640 testData->Set(hoistExpr, tlAndN);
5646 #if LOOP_HOIST_STATS
5647 if (!m_curLoopHasHoistedExpression)
5649 m_loopsWithHoistedExpressions++;
5650 m_curLoopHasHoistedExpression = true;
5652 m_totalHoistedExpressions++;
5653 #endif // LOOP_HOIST_STATS
5656 void Compiler::optHoistLoopCode()
5658 // If we don't have any loops in the method then take an early out now.
5659 if (optLoopCount == 0)
5665 unsigned jitNoHoist = JitConfig.JitNoHoist();
5673 // The code in this #if has been useful in debugging loop cloning issues, by
5674 // enabling selective enablement of the loop cloning optimization according to
5677 unsigned methHash = info.compMethodHash();
5678 char* lostr = getenv("loophoisthashlo");
5679 unsigned methHashLo = 0;
5682 sscanf_s(lostr, "%x", &methHashLo);
5683 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5685 char* histr = getenv("loophoisthashhi");
5686 unsigned methHashHi = UINT32_MAX;
5689 sscanf_s(histr, "%x", &methHashHi);
5690 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5692 if (methHash < methHashLo || methHash > methHashHi)
5694 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5696 #endif // 0 -- debugging loop cloning issues
5701 printf("\n*************** In optHoistLoopCode()\n");
5702 printf("Blocks/Trees before phase\n");
5703 fgDispBasicBlocks(true);
5708 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5709 // they are invariant.)
5710 LoopHoistContext hoistCtxt(this);
5711 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5713 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5718 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5720 optHoistLoopNest(lnum, &hoistCtxt);
5729 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5730 fgDispBasicBlocks(true);
5734 // Make sure that the predecessor lists are accurate
5735 fgDebugCheckBBlist();
5740 // Test Data stuff..
5741 // If we have no test data, early out.
5742 if (m_nodeTestData == nullptr)
5746 NodeToTestDataMap* testData = GetNodeTestData();
5747 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5749 TestLabelAndNum tlAndN;
5750 GenTreePtr node = ki.Get();
5751 bool b = testData->Lookup(node, &tlAndN);
5753 if (tlAndN.m_tl != TL_LoopHoist)
5757 // Otherwise, it is a loop hoist annotation.
5758 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5759 if (tlAndN.m_num >= 0)
5763 printf(" was declared 'must hoist', but has not been hoisted.\n");
5770 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5772 // Do this loop, then recursively do all nested loops.
5773 CLANG_FORMAT_COMMENT_ANCHOR;
5775 #if LOOP_HOIST_STATS
5777 m_curLoopHasHoistedExpression = false;
5778 m_loopsConsidered++;
5779 #endif // LOOP_HOIST_STATS
5781 optHoistThisLoop(lnum, hoistCtxt);
5783 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5785 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5787 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5788 // TODO-Cleanup: we should have a set abstraction for loops.
5789 if (hoistedInCurLoop != nullptr)
5791 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5795 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5797 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5801 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5802 child = optLoopTable[child].lpSibling)
5804 optHoistLoopNest(child, hoistCtxt);
5808 // TODO-Cleanup: we should have a set abstraction for loops.
5809 if (hoistedInCurLoop != nullptr)
5811 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5813 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5814 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5820 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5822 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5824 /* If loop was removed continue */
5826 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5831 /* Get the head and tail of the loop */
5833 BasicBlock* head = pLoopDsc->lpHead;
5834 BasicBlock* tail = pLoopDsc->lpBottom;
5835 BasicBlock* lbeg = pLoopDsc->lpEntry;
5838 // We must have a do-while loop
5839 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5844 // The loop-head must dominate the loop-entry.
5845 // TODO-CQ: Couldn't we make this true if it's not?
5846 if (!fgDominate(head, lbeg))
5851 // if lbeg is the start of a new try block then we won't be able to hoist
5852 if (!BasicBlock::sameTryRegion(head, lbeg))
5857 // We don't bother hoisting when inside of a catch block
5858 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5863 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5865 unsigned begn = lbeg->bbNum;
5866 unsigned endn = tail->bbNum;
5868 // Ensure the per-loop sets/tables are empty.
5869 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5874 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5875 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5879 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5881 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5882 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5883 pLoopDsc->lpHoistedExprCount = 0;
5885 #ifndef _TARGET_64BIT_
5886 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5888 if (longVarsCount > 0)
5890 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5891 // the Counts such that each TYP_LONG variable counts twice.
5893 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5894 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5899 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5900 lvaDispVarSet(lvaLongVars);
5903 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5904 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5906 #endif // !_TARGET_64BIT_
5911 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5912 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5914 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5915 lvaDispVarSet(pLoopDsc->lpVarInOut);
5917 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5918 lvaDispVarSet(loopVars);
5923 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5925 if (floatVarsCount > 0)
5927 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5928 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5930 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5931 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5932 pLoopDsc->lpHoistedFPExprCount = 0;
5934 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5935 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5940 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5941 lvaDispVarSet(inOutFPVars);
5943 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5944 lvaDispVarSet(loopFPVars);
5948 else // (floatVarsCount == 0)
5950 pLoopDsc->lpLoopVarFPCount = 0;
5951 pLoopDsc->lpVarInOutFPCount = 0;
5952 pLoopDsc->lpHoistedFPExprCount = 0;
5955 // Find the set of definitely-executed blocks.
5956 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5957 // Until we have post-dominators, we'll special-case for single-exit blocks.
5958 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5959 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5961 assert(pLoopDsc->lpExit != nullptr);
5962 BasicBlock* cur = pLoopDsc->lpExit;
5963 // Push dominators, until we reach "entry" or exit the loop.
5964 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5969 // If we didn't reach the entry block, give up and *just* push the entry block.
5970 if (cur != pLoopDsc->lpEntry)
5974 defExec.Push(pLoopDsc->lpEntry);
5976 else // More than one exit
5978 // We'll assume that only the entry block is definitely executed.
5979 // We could in the future do better.
5980 defExec.Push(pLoopDsc->lpEntry);
5983 while (defExec.Size() > 0)
5985 // Consider in reverse order: dominator before dominatee.
5986 BasicBlock* blk = defExec.Pop();
5987 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5991 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5992 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
5994 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5995 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5996 unsigned blkWeight = blk->getBBWeight(this);
6001 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
6002 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
6003 firstBlockAndBeforeSideEffect ? "true" : "false");
6004 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6006 printf(" block weight is too small to perform hoisting.\n");
6011 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6013 // Block weight is too small to perform hoisting.
6017 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6019 GenTreePtr stmtTree = stmt->gtStmtExpr;
6021 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6024 // we will try to hoist the top-level stmtTree
6025 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6030 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6032 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6034 bool loopContainsCall = pLoopDsc->lpContainsCall;
6037 int hoistedExprCount;
6041 if (varTypeIsFloating(tree->TypeGet()))
6043 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6044 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6045 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6047 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6048 if (!loopContainsCall)
6050 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6053 // For ARM each double takes two FP registers
6054 // For now on ARM we won't track singles/doubles
6055 // and instead just assume that we always have doubles.
6062 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6063 loopVarCount = pLoopDsc->lpLoopVarCount;
6064 varInOutCount = pLoopDsc->lpVarInOutCount;
6066 availRegCount = CNT_CALLEE_SAVED - 1;
6067 if (!loopContainsCall)
6069 availRegCount += CNT_CALLEE_TRASH - 1;
6071 #ifndef _TARGET_64BIT_
6072 // For our 32-bit targets Long types take two registers.
6073 if (varTypeIsLong(tree->TypeGet()))
6075 availRegCount = (availRegCount + 1) / 2;
6080 // decrement the availRegCount by the count of expression that we have already hoisted.
6081 availRegCount -= hoistedExprCount;
6083 // the variables that are read/written inside the loop should
6084 // always be a subset of the InOut variables for the loop
6085 assert(loopVarCount <= varInOutCount);
6087 // When loopVarCount >= availRegCount we believe that all of the
6088 // available registers will get used to hold LclVars inside the loop.
6089 // This pessimistically assumes that each loopVar has a conflicting
6090 // lifetime with every other loopVar.
6091 // For this case we will hoist the expression only if is profitable
6092 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6093 // as we believe it will be placed in the stack or one of the other
6094 // loopVars will be spilled into the stack
6096 if (loopVarCount >= availRegCount)
6098 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6099 if (tree->gtCostEx < (2 * IND_COST_EX))
6105 // When varInOutCount < availRegCount we are know that there are
6106 // some available register(s) when we enter the loop body.
6107 // When varInOutCount == availRegCount there often will be a register
6108 // available when we enter the loop body, since a loop often defines a
6109 // LclVar on exit or there is often at least one LclVar that is worth
6110 // spilling to the stack to make way for this hoisted expression.
6111 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6113 if (varInOutCount > availRegCount)
6115 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6116 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6126 // This function returns true if 'tree' is a loop invariant expression.
6127 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6129 bool Compiler::optHoistLoopExprsForTree(
6130 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6132 // First do the children.
6133 // We must keep track of whether each child node was hoistable or not
6135 unsigned nChildren = tree->NumChildren();
6136 bool childrenHoistable[GenTree::MAX_CHILDREN];
6138 // Initialize the array elements for childrenHoistable[] to false
6139 for (unsigned i = 0; i < nChildren; i++)
6141 childrenHoistable[i] = false;
6144 bool treeIsInvariant = true;
6145 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6147 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6148 &childrenHoistable[childNum]))
6150 treeIsInvariant = false;
6154 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6156 bool treeIsHoistable = treeIsInvariant;
6158 // But we must see if anything else prevents "tree" from being hoisted.
6160 if (treeIsInvariant)
6162 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6163 treeIsHoistable = optIsCSEcandidate(tree);
6165 // If it's a call, it must be a helper call, and be pure.
6166 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6167 // (meaning it won't run a cctor because the class is not precise-init).
6168 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6170 GenTreeCall* call = tree->AsCall();
6171 if (call->gtCallType != CT_HELPER)
6173 treeIsHoistable = false;
6177 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6178 if (!s_helperCallProperties.IsPure(helpFunc))
6180 treeIsHoistable = false;
6182 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6184 treeIsHoistable = false;
6189 if (treeIsHoistable)
6191 if (!(*pFirstBlockAndBeforeSideEffect))
6193 // For now, we give up on an expression that might raise an exception if it is after the
6194 // first possible global side effect (and we assume we're after that if we're not in the first block).
6195 // TODO-CQ: this is when we might do loop cloning.
6197 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6199 treeIsHoistable = false;
6202 // Currently we must give up on reads from static variables (even if we are in the first block).
6204 if (tree->OperGet() == GT_CLS_VAR)
6206 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6208 treeIsHoistable = false;
6212 // Is the value of the whole tree loop invariant?
6214 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6216 // Is the value of the whole tree loop invariant?
6217 if (!treeIsInvariant)
6219 treeIsHoistable = false;
6223 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6224 // If we encounter a tree with a call in it
6225 // or if we see an assignment to global we set it to false.
6227 // If we are already set to false then we can skip these checks
6229 if (*pFirstBlockAndBeforeSideEffect)
6231 // For this purpose, we only care about memory side effects. We assume that expressions will
6232 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6233 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6234 // here, since that includes exceptions.)
6237 // If it's a call, it must be a helper call that does not mutate the heap.
6238 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6239 // (meaning it won't run a cctor because the class is not precise-init).
6240 GenTreeCall* call = tree->AsCall();
6241 if (call->gtCallType != CT_HELPER)
6243 *pFirstBlockAndBeforeSideEffect = false;
6247 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6248 if (s_helperCallProperties.MutatesHeap(helpFunc))
6250 *pFirstBlockAndBeforeSideEffect = false;
6252 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6254 *pFirstBlockAndBeforeSideEffect = false;
6258 else if (tree->OperIsAssignment())
6260 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6261 GenTreePtr lhs = tree->gtOp.gtOp1;
6262 if (lhs->gtFlags & GTF_GLOB_REF)
6264 *pFirstBlockAndBeforeSideEffect = false;
6267 else if (tree->OperIsCopyBlkOp())
6269 GenTreePtr args = tree->gtOp.gtOp1;
6270 assert(args->OperGet() == GT_LIST);
6271 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6273 *pFirstBlockAndBeforeSideEffect = false;
6278 // If this 'tree' is hoistable then we return and the caller will
6279 // decide to hoist it as part of larger hoistable expression.
6281 if (!treeIsHoistable)
6283 // We are not hoistable so we will now hoist any hoistable children.
6285 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6287 if (childrenHoistable[childNum])
6289 // We can't hoist the LHS of an assignment, isn't a real use.
6290 if (childNum == 0 && (tree->OperIsAssignment()))
6295 GenTreePtr child = tree->GetChild(childNum);
6297 // We try to hoist this 'child' tree
6298 optHoistCandidate(child, lnum, hoistCtxt);
6303 *pHoistable = treeIsHoistable;
6304 return treeIsInvariant;
6307 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6309 if (lnum == BasicBlock::NOT_IN_LOOP)
6311 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6315 // The outer loop also must be suitable for hoisting...
6316 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6321 // If the hoisted expression isn't valid at this loop head then break
6322 if (!optTreeIsValidAtLoopHead(tree, lnum))
6327 // It must pass the hoistable profitablity tests for this loop level
6328 if (!optIsProfitableToHoistableTree(tree, lnum))
6334 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6336 // already hoisted in a parent loop, so don't hoist this expression.
6340 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6342 // already hoisted this expression in the current loop, so don't hoist this expression.
6346 // Expression can be hoisted
6347 optPerformHoistExpr(tree, lnum);
6349 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6350 if (!varTypeIsFloating(tree->TypeGet()))
6352 optLoopTable[lnum].lpHoistedExprCount++;
6353 #ifndef _TARGET_64BIT_
6354 // For our 32-bit targets Long types take two registers.
6355 if (varTypeIsLong(tree->TypeGet()))
6357 optLoopTable[lnum].lpHoistedExprCount++;
6361 else // Floating point expr hoisted
6363 optLoopTable[lnum].lpHoistedFPExprCount++;
6366 // Record the hoisted expression in hoistCtxt
6367 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6370 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6372 // If it is not a VN, is not loop-invariant.
6373 if (vn == ValueNumStore::NoVN)
6378 // We'll always short-circuit constants.
6379 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6384 // If we've done this query previously, don't repeat.
6385 bool previousRes = false;
6386 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6393 if (vnStore->GetVNFunc(vn, &funcApp))
6395 if (funcApp.m_func == VNF_PhiDef)
6397 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6398 VNFuncApp phiDefValFuncApp;
6399 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6401 // It's not *really* a definition, rather a pass-through of some other VN.
6402 // (This could occur, say if both sides of an if-then-else diamond made the
6403 // same assignment to a variable.)
6404 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6408 // Is the definition within the loop? If so, is not loop-invariant.
6409 unsigned lclNum = funcApp.m_args[0];
6410 unsigned ssaNum = funcApp.m_args[1];
6411 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6412 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6415 else if (funcApp.m_func == VNF_PhiHeapDef)
6417 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6418 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6422 for (unsigned i = 0; i < funcApp.m_arity; i++)
6424 // TODO-CQ: We need to either make sure that *all* VN functions
6425 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6427 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6437 // Non-function "new, unique" VN's may be annotated with the loop nest where
6438 // their definition occurs.
6439 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
6441 if (vnLoopNum == MAX_LOOP_NUM)
6447 res = !optLoopContains(lnum, vnLoopNum);
6451 loopVnInvariantCache->Set(vn, res);
6455 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6457 if (tree->OperIsLocal())
6459 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6460 unsigned lclNum = lclVar->gtLclNum;
6462 // The lvlVar must be have an Ssa tracked lifetime
6463 if (fgExcludeFromSsa(lclNum))
6468 // If the loop does not contains the SSA def we can hoist it.
6469 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6474 else if (tree->OperIsConst())
6478 else // If every one of the children nodes are valid at this Loop's Head.
6480 unsigned nChildren = tree->NumChildren();
6481 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6483 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6493 /*****************************************************************************
6495 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6496 * header. The pre-header will replace the current lpHead in the loop table.
6497 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6498 * will also be dominated by the loop-top, lpHead->bbNext.
6502 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6504 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6506 /* This loop has to be a "do-while" loop */
6508 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6510 /* Have we already created a loop-preheader block? */
6512 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6517 BasicBlock* head = pLoopDsc->lpHead;
6518 BasicBlock* top = pLoopDsc->lpTop;
6519 BasicBlock* entry = pLoopDsc->lpEntry;
6521 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6522 if (!BasicBlock::sameTryRegion(head, entry))
6527 // Ensure that lpHead always dominates lpEntry
6529 noway_assert(fgDominate(head, entry));
6531 /* Get hold of the first block of the loop body */
6533 assert(top == entry);
6535 /* Allocate a new basic block */
6537 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6538 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6540 // Must set IL code offset
6541 preHead->bbCodeOffs = top->bbCodeOffs;
6543 // Set the default value of the preHead weight in case we don't have
6544 // valid profile data and since this blocks weight is just an estimate
6545 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6547 preHead->inheritWeight(head);
6548 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6553 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6554 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6558 // The preheader block is part of the containing loop (if any).
6559 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6561 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6563 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6565 preHead->bbWeight = 0;
6566 preHead->bbFlags |= BBF_RUN_RARELY;
6570 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6571 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6572 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6574 if (allValidProfileWeights)
6576 double loopEnteredCount;
6577 double loopSkippedCount;
6579 if (fgHaveValidEdgeWeights)
6581 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6582 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6583 noway_assert(edgeToNext != nullptr);
6584 noway_assert(edgeToJump != nullptr);
6587 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6589 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6593 loopEnteredCount = (double)head->bbNext->bbWeight;
6594 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6597 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6599 // Calculate a good approximation of the preHead's block weight
6600 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6601 preHead->setBBWeight(max(preHeadWeight, 1));
6602 noway_assert(!preHead->isRunRarely());
6607 // Link in the preHead block.
6608 fgInsertBBbefore(top, preHead);
6610 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6611 // However, that is too expensive at this point. Instead, we update the phi
6612 // node block references, if we created pre-header block due to hoisting.
6613 // This is sufficient because any definition participating in SSA that flowed
6614 // into the phi via the loop header block will now flow through the preheader
6615 // block from the header block.
6617 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6619 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6620 if (tree->OperGet() != GT_ASG)
6624 GenTreePtr op2 = tree->gtGetOp2();
6625 if (op2->OperGet() != GT_PHI)
6629 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6630 while (args != nullptr)
6632 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6633 if (phiArg->gtPredBB == head)
6635 phiArg->gtPredBB = preHead;
6637 args = args->Rest();
6641 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6642 // to set the handler index on the pre header without updating the exception table.
6643 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6645 // Update the EH table to make the hoisted block part of the loop's EH block.
6646 fgExtendEHRegionBefore(top);
6648 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6649 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6651 /* Update the loop entry */
6653 pLoopDsc->lpHead = preHead;
6654 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6656 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6657 All predecessors of 'beg', (which is the entry in the loop)
6658 now have to jump to 'preHead', unless they are dominated by 'head' */
6660 preHead->bbRefs = 0;
6661 fgAddRefPred(preHead, head);
6662 bool checkNestedLoops = false;
6664 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6666 BasicBlock* predBlock = pred->flBlock;
6668 if (fgDominate(top, predBlock))
6670 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6671 // (we know that 'head' dominates 'top'), but using 'top' instead of
6672 // 'head' in the test allows us to not enter here if 'predBlock == head'
6674 if (predBlock != pLoopDsc->lpBottom)
6676 noway_assert(predBlock != head);
6677 checkNestedLoops = true;
6682 switch (predBlock->bbJumpKind)
6685 noway_assert(predBlock == head);
6689 if (predBlock == head)
6691 noway_assert(predBlock->bbJumpDest != top);
6697 case BBJ_EHCATCHRET:
6698 noway_assert(predBlock->bbJumpDest == top);
6699 predBlock->bbJumpDest = preHead;
6700 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6702 if (predBlock == head)
6704 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6705 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6706 // Just break, pred will be removed after switch.
6710 fgRemoveRefPred(top, predBlock);
6711 fgAddRefPred(preHead, predBlock);
6717 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6718 BasicBlock** jumpTab;
6719 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6724 if ((*jumpTab) == top)
6726 (*jumpTab) = preHead;
6728 fgRemoveRefPred(top, predBlock);
6729 fgAddRefPred(preHead, predBlock);
6730 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6732 } while (++jumpTab, --jumpCnt);
6735 noway_assert(!"Unexpected bbJumpKind");
6740 noway_assert(!fgGetPredForBlock(top, preHead));
6741 fgRemoveRefPred(top, head);
6742 fgAddRefPred(top, preHead);
6745 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6746 (other than the back-edge of the loop we are considering) then we likely have nested
6747 do-while loops with the same entry block and inserting the preheader block changes the head
6748 of all the nested loops. Now we will update this piece of information in the loop table, and
6749 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6750 do-while loops with the same entry block).
6752 if (checkNestedLoops)
6754 for (unsigned l = 0; l < optLoopCount; l++)
6756 if (optLoopTable[l].lpHead == head)
6758 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6759 noway_assert(optLoopTable[l].lpEntry == top);
6760 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6761 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6765 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6766 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6774 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6776 for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
6778 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6782 if (optLoopTable[lnum].lpEntry == blk)
6791 void Compiler::optComputeLoopSideEffects()
6794 for (lnum = 0; lnum < optLoopCount; lnum++)
6796 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6797 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6798 optLoopTable[lnum].lpContainsCall = false;
6801 for (lnum = 0; lnum < optLoopCount; lnum++)
6803 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6808 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6809 { // Is outermost...
6810 optComputeLoopNestSideEffects(lnum);
6814 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6815 #ifndef _TARGET_64BIT_
6816 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6819 for (unsigned i = 0; i < lvaCount; i++)
6821 LclVarDsc* varDsc = &lvaTable[i];
6822 if (varDsc->lvTracked)
6824 if (varTypeIsFloating(varDsc->lvType))
6826 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6828 #ifndef _TARGET_64BIT_
6829 else if (varTypeIsLong(varDsc->lvType))
6831 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6838 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6840 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6841 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6842 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6844 optComputeLoopSideEffectsOfBlock(bbInLoop);
6848 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6850 unsigned mostNestedLoop = blk->bbNatLoopNum;
6851 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6853 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6855 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6857 // Now iterate over the remaining statements, and their trees.
6858 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6860 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6862 genTreeOps oper = tree->OperGet();
6864 // Even after we set heapHavoc we still may want to know if a loop contains calls
6867 if (oper == GT_CALL)
6869 // Record that this loop contains a call
6870 AddContainsCallAllContainingLoops(mostNestedLoop);
6873 // If we just set lpContainsCall or it was previously set
6874 if (optLoopTable[mostNestedLoop].lpContainsCall)
6876 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6880 // We are just looking for GT_CALL nodes after heapHavoc was set.
6884 // otherwise heapHavoc is not set
6887 // This body is a distillation of the heap-side effect code of value numbering.
6888 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6889 // that the compiler creates.
6891 if (GenTree::OperIsAssignment(oper))
6893 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6895 if (lhs->OperGet() == GT_IND)
6897 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6898 FieldSeqNode* fldSeqArrElem = nullptr;
6900 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6908 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6910 // If it's a local byref for which we recorded a value number, use that...
6911 GenTreeLclVar* argLcl = arg->AsLclVar();
6912 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6915 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6917 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6918 funcApp.m_func == VNF_PtrToArrElem)
6920 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6921 CORINFO_CLASS_HANDLE elemType =
6922 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6923 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6924 // Don't set heapHavoc below.
6931 // Is the LHS an array index expression?
6932 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6934 // We actually ignore "fldSeq" -- any modification to an S[], at any
6935 // field of "S", will lose all information about the array type.
6936 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6937 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6941 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6943 GenTreePtr obj = nullptr; // unused
6944 GenTreePtr staticOffset = nullptr; // unused
6945 FieldSeqNode* fldSeq = nullptr;
6947 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6948 (fldSeq != FieldSeqStore::NotAField()))
6950 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6951 assert(fldSeq != nullptr);
6952 if (fldSeq->IsFirstElemFieldSeq())
6954 fldSeq = fldSeq->m_next;
6955 assert(fldSeq != nullptr);
6958 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6966 else if (lhs->OperIsBlk())
6968 GenTreeLclVarCommon* lclVarTree;
6970 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6972 // For now, assume arbitrary side effects on the heap...
6976 else if (lhs->OperGet() == GT_CLS_VAR)
6978 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6980 // Otherwise, must be local lhs form. I should assert that.
6981 else if (lhs->OperGet() == GT_LCL_VAR)
6983 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6984 GenTreePtr rhs = tree->gtOp.gtOp2;
6985 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6986 // If we gave the RHS a value number, propagate it.
6987 if (rhsVN != ValueNumStore::NoVN)
6989 rhsVN = vnStore->VNNormVal(rhsVN);
6990 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6992 lvaTable[lhsLcl->GetLclNum()]
6993 .GetPerSsaData(lhsLcl->GetSsaNum())
6994 ->m_vnPair.SetLiberal(rhsVN);
6999 else // not GenTree::OperIsAssignment(oper)
7004 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7008 // Is it an addr of a array index expression?
7010 GenTreePtr addrArg = tree->gtOp.gtOp1;
7011 if (addrArg->OperGet() == GT_IND)
7013 // Is the LHS an array index expression?
7014 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7017 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7019 CORINFO_CLASS_HANDLE elemType =
7020 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7021 tree->gtVNPair.SetBoth(
7022 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7023 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7024 // The rest are dummy arguments.
7025 vnStore->VNForNull(), vnStore->VNForNull(),
7026 vnStore->VNForNull()));
7032 case GT_LOCKADD: // Binop
7033 case GT_XADD: // Binop
7034 case GT_XCHG: // Binop
7035 case GT_CMPXCHG: // Specialop
7043 GenTreeCall* call = tree->AsCall();
7045 // Record that this loop contains a call
7046 AddContainsCallAllContainingLoops(mostNestedLoop);
7048 if (call->gtCallType == CT_HELPER)
7050 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7051 if (s_helperCallProperties.MutatesHeap(helpFunc))
7055 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7057 // If the call is labeled as "Hoistable", then we've checked the
7058 // class that would be constructed, and it is not precise-init, so
7059 // the cctor will not be run by this call. Otherwise, it might be,
7060 // and might have arbitrary side effects.
7061 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7075 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7084 // Record that all loops containing this block have heap havoc effects.
7085 unsigned lnum = mostNestedLoop;
7086 while (lnum != BasicBlock::NOT_IN_LOOP)
7088 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7089 lnum = optLoopTable[lnum].lpParent;
7094 // Marks the containsCall information to "lnum" and any parent loops.
7095 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7097 assert(0 <= lnum && lnum < optLoopCount);
7098 while (lnum != BasicBlock::NOT_IN_LOOP)
7100 optLoopTable[lnum].lpContainsCall = true;
7101 lnum = optLoopTable[lnum].lpParent;
7105 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7106 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7108 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7109 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7111 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7112 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7115 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7116 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7118 assert(0 <= lnum && lnum < optLoopCount);
7119 while (lnum != BasicBlock::NOT_IN_LOOP)
7121 optLoopTable[lnum].AddVariableLiveness(this, blk);
7122 lnum = optLoopTable[lnum].lpParent;
7126 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7127 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7129 assert(0 <= lnum && lnum < optLoopCount);
7130 while (lnum != BasicBlock::NOT_IN_LOOP)
7132 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7133 lnum = optLoopTable[lnum].lpParent;
7137 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7138 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7140 assert(0 <= lnum && lnum < optLoopCount);
7141 while (lnum != BasicBlock::NOT_IN_LOOP)
7143 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7144 lnum = optLoopTable[lnum].lpParent;
7148 /*****************************************************************************
7150 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7151 * The 'keepList'is either a single tree or a list of trees that are formed by
7152 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7153 * gtExtractSideEffList method.
7157 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7159 GenTreePtr tree = *pTree;
7160 Compiler* comp = data->compiler;
7161 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7163 // We may have a non-NULL side effect list that is being kept
7167 GenTreePtr keptTree = keepList;
7168 while (keptTree->OperGet() == GT_COMMA)
7170 assert(keptTree->OperKind() & GTK_SMPOP);
7171 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7172 GenTreePtr op2 = keptTree->gtGetOp2();
7174 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7175 // that is being kept because it contains some side-effect
7179 // This tree and all of its sub trees are being kept.
7180 return WALK_SKIP_SUBTREES;
7183 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7184 // which can again be another GT_COMMA or the final side-effect part
7188 if (tree == keptTree)
7190 // This tree and all of its sub trees are being kept.
7191 return WALK_SKIP_SUBTREES;
7195 // This node is being removed from the graph of GenTreePtr
7197 // Look for any local variable references
7199 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7204 /* This variable ref is going away, decrease its ref counts */
7206 lclNum = tree->gtLclVarCommon.gtLclNum;
7207 assert(lclNum < comp->lvaCount);
7208 varDsc = comp->lvaTable + lclNum;
7210 // make sure it's been initialized
7211 assert(comp->compCurBB != nullptr);
7212 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7214 /* Decrement its lvRefCnt and lvRefCntWtd */
7216 // Use getBBWeight to determine the proper block weight.
7217 // This impacts the block weights when we have IBC data.
7218 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7221 return WALK_CONTINUE;
7224 /*****************************************************************************
7226 * Routine called to decrement the LclVar ref counts when removing a tree
7227 * during the remove RangeCheck phase.
7228 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7229 * unless the node is found in the 'keepList' (which are saved side effects)
7230 * The keepList is communicated using the walkData.pCallbackData field
7231 * Also the compCurBB must be set to the current BasicBlock which contains
7232 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7235 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7237 // We communicate this value using the walkData.pCallbackData field
7239 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7242 /*****************************************************************************
7244 * Given an array index node, mark it as not needing a range check.
7247 void Compiler::optRemoveRangeCheck(
7248 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7264 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7267 noway_assert(stmt->gtOper == GT_STMT);
7268 noway_assert(tree->gtOper == GT_COMMA);
7269 noway_assert(tree->gtOp.gtOp1->OperIsBoundsCheck());
7270 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7272 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7277 printf("Before optRemoveRangeCheck:\n");
7282 GenTreePtr sideEffList = nullptr;
7285 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7288 // Decrement the ref counts for any LclVars that are being deleted
7290 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7292 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7293 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7295 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7296 tree->gtFlags |= GTF_DONT_CSE;
7298 /* Recalculate the gtCostSz, etc... */
7299 gtSetStmtInfo(stmt);
7301 /* Re-thread the nodes if necessary */
7302 if (fgStmtListThreaded)
7310 printf("After optRemoveRangeCheck:\n");
7316 /*****************************************************************************
7317 * Return the scale in an array reference, given a pointer to the
7318 * multiplication node.
7321 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7324 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7325 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7327 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7329 if (mul->gtOper == GT_LSH)
7331 scale = ((ssize_t)1) << scale;
7334 GenTreePtr index = mul->gtOp.gtOp1;
7336 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7338 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7339 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7340 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7341 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7342 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7343 index = index->gtOp.gtOp1;
7346 assert(!bRngChk || index->gtOper != GT_COMMA);
7356 /*****************************************************************************
7357 * Find the last assignment to of the local variable in the block. Return
7358 * RHS or NULL. If any local variable in the RHS has been killed in
7359 * intervening code, return NULL. If the variable being searched for is killed
7360 * in the intervening code, return NULL.
7364 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7366 VARSET_TP* pKilledInOut,
7367 bool* pLhsRhsKilledAfterInit)
7369 assert(pKilledInOut);
7370 assert(pLhsRhsKilledAfterInit);
7372 *pLhsRhsKilledAfterInit = false;
7374 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7376 GenTreePtr list = block->bbTreeList;
7377 if (list == nullptr)
7382 GenTreePtr rhs = nullptr;
7383 GenTreePtr stmt = list;
7386 stmt = stmt->gtPrev;
7387 if (stmt == nullptr)
7392 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7393 // If we encounter an assignment to a local variable,
7394 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7396 // And the assigned variable equals the input local,
7397 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7399 // If the assignment is '=' and it is not a conditional, then return rhs.
7400 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7402 rhs = tree->gtOp.gtOp2;
7404 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7405 // as we found a kill to the input local.
7408 *pLhsRhsKilledAfterInit = true;
7409 assert(rhs == nullptr);
7415 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7416 if (varDsc == nullptr)
7420 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7423 } while (stmt != list);
7430 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7431 varRefKinds rhsRefs = VR_NONE;
7432 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7433 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7434 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7436 // If RHS has been indirectly referenced, consider it a write and a kill.
7437 *pLhsRhsKilledAfterInit = true;
7444 /*****************************************************************************
7446 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7451 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7453 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7455 add1 += op1->gtIntCon.gtIconVal;
7456 add2 += op2->gtIntCon.gtIconVal;
7460 /* Check for +/- constant on either operand */
7462 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7464 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7465 op1 = op1->gtOp.gtOp1;
7468 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7470 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7471 op2 = op2->gtOp.gtOp1;
7474 /* We only allow local variable references */
7476 if (op1->gtOper != GT_LCL_VAR)
7478 if (op2->gtOper != GT_LCL_VAR)
7480 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7483 /* NOTE: Caller ensures that this variable has only one def */
7485 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7486 // printf("size [%d]:\n", add2); gtDispTree(op2);
7490 return (bool)(add1 <= add2);
7495 //------------------------------------------------------------------------------
7496 // optObtainLoopCloningOpts: Identify optimization candidates and update
7497 // the "context" for array optimizations.
7500 // context - data structure where all loop cloning info is kept. The
7501 // optInfo fields of the context are updated with the
7502 // identified optimization candidates.
7504 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7506 for (unsigned i = 0; i < optLoopCount; i++)
7508 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7509 if (optIsLoopClonable(i))
7511 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7513 optIdentifyLoopOptInfo(i, context);
7516 JITDUMP("------------------------------------------------------------\n");
7521 //------------------------------------------------------------------------
7522 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7523 // check if the loop is suitable for the optimizations performed.
7526 // loopNum - the current loop index for which conditions are derived.
7527 // context - data structure where all loop cloning candidates will be
7531 // If the loop is not suitable for the optimizations, return false - context
7532 // should not contain any optimization candidate for the loop if false.
7533 // Else return true.
7536 // Check if the loop is well formed for this optimization and identify the
7537 // optimization candidates and update the "context" parameter with all the
7538 // contextual information necessary to perform the optimization later.
7540 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7542 noway_assert(loopNum < optLoopCount);
7544 LoopDsc* pLoop = &optLoopTable[loopNum];
7546 if (!(pLoop->lpFlags & LPFLG_ITER))
7548 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7552 unsigned ivLclNum = pLoop->lpIterVar();
7553 if (lvaVarAddrExposed(ivLclNum))
7555 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7559 BasicBlock* head = pLoop->lpHead;
7560 BasicBlock* end = pLoop->lpBottom;
7561 BasicBlock* beg = head->bbNext;
7563 if (end->bbJumpKind != BBJ_COND)
7565 JITDUMP("> Couldn't find termination test.\n");
7569 if (end->bbJumpDest != beg)
7571 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7575 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7576 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7578 JITDUMP("> Loop iteration operator not matching\n");
7582 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7583 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7585 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7589 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7590 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7591 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7592 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7594 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7595 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7599 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7601 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7602 pLoop->lpTestTree->gtTreeID);
7607 GenTreePtr op1 = pLoop->lpIterator();
7608 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7611 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7612 end->bbNext ? end->bbNext->bbNum : 0);
7614 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7615 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7618 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7621 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7628 //---------------------------------------------------------------------------------------------------------------
7629 // optExtractArrIndex: Try to extract the array index from "tree".
7632 // tree the tree to be checked if it is the array [] operation.
7633 // result the extracted GT_INDEX information is updated in result.
7634 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7637 // Returns true if array index can be extracted, else, return false. See assumption about
7638 // what will be extracted. The "result" variable's rank parameter is advanced for every
7639 // dimension of [] encountered.
7642 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7643 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7644 // to reconstruct this to be able to know if this is an array access.
7647 // The method extracts only if the array base and indices are GT_LCL_VAR.
7649 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7651 // [000000001AF828D8] ---XG------- indir int
7652 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7653 // [000000001AF87340] ------------ + byref
7654 // [000000001AF87160] -------N---- const long 2
7655 // [000000001AF871D8] ------------ << long
7656 // [000000001AF870C0] ------------ cast long <- int
7657 // [000000001AF86F30] i----------- lclVar int V04 loc0
7658 // [000000001AF87250] ------------ + byref
7659 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7660 // [000000001AF87468] ---XG------- comma int
7661 // [000000001AF87020] ---X-------- arrBndsChk void
7662 // [000000001AF86FA8] ---X-------- arrLen int
7663 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7664 // [000000001AF82860] ------------ lclVar int V04 loc0
7665 // [000000001AF829F0] -A-XG------- = int
7666 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7668 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7670 if (tree->gtOper != GT_COMMA)
7674 GenTreePtr before = tree->gtGetOp1();
7675 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7679 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7680 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7684 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7688 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7689 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7694 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7696 GenTreePtr after = tree->gtGetOp2();
7698 if (after->gtOper != GT_IND)
7702 // It used to be the case that arrBndsChks for struct types would fail the previous check because
7703 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
7704 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
7705 // that we were not previously cloning.)
7706 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
7708 if (varTypeIsStruct(after))
7713 GenTreePtr sibo = after->gtGetOp1();
7714 if (sibo->gtOper != GT_ADD)
7718 GenTreePtr sib = sibo->gtGetOp1();
7719 GenTreePtr ofs = sibo->gtGetOp2();
7720 if (ofs->gtOper != GT_CNS_INT)
7724 if (sib->gtOper != GT_ADD)
7728 GenTreePtr si = sib->gtGetOp2();
7729 GenTreePtr base = sib->gtGetOp1();
7730 if (si->gtOper != GT_LSH)
7734 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7738 GenTreePtr scale = si->gtGetOp2();
7739 GenTreePtr index = si->gtGetOp1();
7740 if (scale->gtOper != GT_CNS_INT)
7744 #ifdef _TARGET_AMD64_
7745 if (index->gtOper != GT_CAST)
7749 GenTreePtr indexVar = index->gtGetOp1();
7751 GenTreePtr indexVar = index;
7753 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7757 if (lhsNum == BAD_VAR_NUM)
7759 result->arrLcl = arrLcl;
7761 result->indLcls.Push(indLcl);
7762 result->bndsChks.Push(tree);
7763 result->useBlock = compCurBB;
7769 //---------------------------------------------------------------------------------------------------------------
7770 // optReconstructArrIndex: Reconstruct array index.
7773 // tree the tree to be checked if it is an array [][][] operation.
7774 // result the extracted GT_INDEX information.
7775 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7778 // Returns true if array index can be extracted, else, return false. "rank" field in
7779 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7782 // Recursively look for a list of array indices. In the example below, we encounter,
7783 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7784 // The return value would then be:
7785 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7787 // V00[V01][V02] would be morphed as:
7789 // [000000001B366848] ---XG------- indir int
7790 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7791 // [000000001B36C200] ---XG------- comma int
7792 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7793 // [000000001B36C278] -A-XG------- comma int
7794 // [000000001B366730] R--XG------- indir ref
7795 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7796 // [000000001B36C818] ---XG------- comma ref
7797 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7798 // [000000001B36BB60] -A-XG------- = ref
7799 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7800 // [000000001B36A668] -A-XG------- = int
7801 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7804 // The method extracts only if the array base and indices are GT_LCL_VAR.
7806 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7808 // If we can extract "tree" (which is a top level comma) return.
7809 if (optExtractArrIndex(tree, result, lhsNum))
7813 // We have a comma (check if array base expr is computed in "before"), descend further.
7814 else if (tree->OperGet() == GT_COMMA)
7816 GenTreePtr before = tree->gtGetOp1();
7817 // "before" should evaluate an array base for the "after" indexing.
7818 if (before->OperGet() != GT_ASG)
7822 GenTreePtr lhs = before->gtGetOp1();
7823 GenTreePtr rhs = before->gtGetOp2();
7825 // "rhs" should contain an GT_INDEX
7826 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7830 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7831 GenTreePtr after = tree->gtGetOp2();
7832 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7833 return optExtractArrIndex(after, result, lhsNum);
7839 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7841 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7844 //-------------------------------------------------------------------------
7845 // optIsStackLocalInvariant: Is stack local invariant in loop.
7848 // loopNum The loop in which the variable is tested for invariance.
7849 // lclNum The local that is tested for invariance in the loop.
7852 // Returns true if the variable is loop invariant in loopNum.
7854 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7856 if (lvaVarAddrExposed(lclNum))
7860 if (optIsVarAssgLoop(loopNum, lclNum))
7867 //----------------------------------------------------------------------------------------------
7868 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7869 // identify as potential candidate and update the loop context.
7872 // tree The tree encountered during the tree walk.
7873 // info Supplies information about the current block or stmt in which the tree is.
7874 // Also supplies the "context" pointer for updating with loop cloning
7875 // candidates. Also supplies loopNum.
7878 // If array index can be reconstructed, check if the iter var of the loop matches the
7879 // array index var in some dim. Also ensure other index vars before the identified
7880 // dim are loop invariant.
7883 // Skip sub trees if the optimization candidate is identified or else continue walking
7885 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7887 ArrIndex arrIndex(getAllocator());
7889 // Check if array index can be optimized.
7890 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7892 assert(tree->gtOper == GT_COMMA);
7896 JITDUMP("Found ArrIndex at tree ");
7898 printf(" which is equivalent to: ");
7903 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7905 return WALK_SKIP_SUBTREES;
7908 // Walk the dimensions and see if iterVar of the loop is used as index.
7909 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7911 // Is index variable also used as the loop iter var.
7912 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7914 // Check the previous indices are all loop invariant.
7915 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7917 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7919 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7920 return WALK_SKIP_SUBTREES;
7926 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7928 JITDUMP(" on dim %d\n", dim);
7931 // Update the loop context.
7932 info->context->EnsureLoopOptInfo(info->loopNum)
7933 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7937 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7941 return WALK_SKIP_SUBTREES;
7943 else if (tree->gtOper == GT_ARR_ELEM)
7945 // TODO-CQ: CLONE: Implement.
7946 return WALK_SKIP_SUBTREES;
7948 return WALK_CONTINUE;
7951 struct optRangeCheckDsc
7953 Compiler* pCompiler;
7957 Walk to make sure that only locals and constants are contained in the index
7960 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7962 GenTreePtr tree = *pTree;
7963 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7965 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7967 pData->bValidIndex = false;
7971 if (tree->gtOper == GT_LCL_VAR)
7973 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7975 pData->bValidIndex = false;
7980 return WALK_CONTINUE;
7984 returns true if a range check can legally be removed (for the moment it checks
7985 that the array is a local array (non subject to racing conditions) and that the
7986 index is either a constant or a local
7988 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7990 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7991 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7992 GenTreePtr pArray = bndsChk->GetArray();
7993 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
7997 GenTreePtr pIndex = bndsChk->gtIndex;
7999 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
8000 // Otherwise we can be targeted by malicious race-conditions.
8001 if (pArray != nullptr)
8003 if (pArray->gtOper != GT_LCL_VAR)
8009 printf("Can't remove range check if the array isn't referenced with a local\n");
8017 noway_assert(pArray->gtType == TYP_REF);
8018 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8020 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8022 // If the array address has been taken, don't do the optimization
8023 // (this restriction can be lowered a bit, but i don't think it's worth it)
8024 CLANG_FORMAT_COMMENT_ANCHOR;
8028 printf("Can't remove range check if the array has its address taken\n");
8037 optRangeCheckDsc Data;
8038 Data.pCompiler = this;
8039 Data.bValidIndex = true;
8041 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8043 if (!Data.bValidIndex)
8048 printf("Can't remove range check with this index");
8059 /******************************************************************************
8061 * Replace x==null with (x|x)==0 if x is a GC-type.
8062 * This will stress code-gen and the emitter to make sure they support such trees.
8067 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8069 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8074 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8075 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8077 noway_assert(condStmt->gtOper == GT_JTRUE);
8082 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8084 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8089 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8094 GenTreePtr comparandClone = gtCloneExpr(comparand);
8096 // Bump up the ref-counts of any variables in 'comparandClone'
8097 compCurBB = condBlock;
8098 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8100 noway_assert(relop->gtOp.gtOp1 == comparand);
8101 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8102 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8104 // Comparand type is already checked, and we have const int, there is no harm
8105 // morphing it into a TYP_I_IMPL.
8106 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8107 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8112 /******************************************************************************
8113 * Function used by folding of boolean conditionals
8114 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8115 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8116 * being a boolean lclVar and "op2" the const 0/1.
8117 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8118 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8119 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8120 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8121 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8124 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8126 bool isBool = false;
8128 noway_assert(condBranch->gtOper == GT_JTRUE);
8129 GenTree* cond = condBranch->gtOp.gtOp1;
8131 /* The condition must be "!= 0" or "== 0" */
8133 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8138 /* Return the compare node to the caller */
8142 /* Get hold of the comparands */
8144 GenTree* opr1 = cond->gtOp.gtOp1;
8145 GenTree* opr2 = cond->gtOp.gtOp2;
8147 if (opr2->gtOper != GT_CNS_INT)
8152 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8157 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8159 /* Is the value a boolean?
8160 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8161 * a local variable that is marked as being boolean (lvIsBoolean) */
8163 if (opr1->gtFlags & GTF_BOOLEAN)
8167 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8171 else if (opr1->gtOper == GT_LCL_VAR)
8173 /* is it a boolean local variable */
8175 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8176 noway_assert(lclNum < lvaCount);
8178 if (lvaTable[lclNum].lvIsBoolean)
8184 /* Was our comparison against the constant 1 (i.e. true) */
8187 // If this is a boolean expression tree we can reverse the relop
8188 // and change the true to false.
8191 gtReverseCond(cond);
8192 opr2->gtIntCon.gtIconVal = 0;
8204 void Compiler::optOptimizeBools()
8209 printf("*************** In optOptimizeBools()\n");
8212 printf("Blocks/Trees before phase\n");
8213 fgDispBasicBlocks(true);
8223 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8225 /* We're only interested in conditional jumps here */
8227 if (b1->bbJumpKind != BBJ_COND)
8232 /* If there is no next block, we're done */
8234 BasicBlock* b2 = b1->bbNext;
8240 /* The next block must not be marked as BBF_DONT_REMOVE */
8241 if (b2->bbFlags & BBF_DONT_REMOVE)
8246 /* The next block also needs to be a condition */
8248 if (b2->bbJumpKind != BBJ_COND)
8251 optOptimizeBoolsGcStress(b1);
8256 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8258 if (b1->bbJumpDest == b2->bbJumpDest)
8260 /* Given the following sequence of blocks :
8264 we wil try to fold it to :
8265 B1: brtrue(t1|t2, BX)
8271 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8273 /* Given the following sequence of blocks :
8277 we will try to fold it to :
8278 B1: brtrue((!t1)&&t2, B3)
8289 /* The second block must contain a single statement */
8291 GenTreePtr s2 = b2->bbTreeList;
8292 if (s2->gtPrev != s2)
8297 noway_assert(s2->gtOper == GT_STMT);
8298 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8299 noway_assert(t2->gtOper == GT_JTRUE);
8301 /* Find the condition for the first block */
8303 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8305 noway_assert(s1->gtOper == GT_STMT);
8306 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8307 noway_assert(t1->gtOper == GT_JTRUE);
8309 if (b2->countOfInEdges() > 1)
8314 /* Find the branch conditions of b1 and b2 */
8318 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8324 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8330 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8331 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8333 // Leave out floats where the bit-representation is more complicated
8334 // - there are two representations for 0.
8336 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8341 // Make sure the types involved are of the same sizes
8342 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8346 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8350 #ifdef _TARGET_ARMARCH_
8351 // Skip the small operand which we cannot encode.
8352 if (varTypeIsSmall(c1->TypeGet()))
8355 /* The second condition must not contain side effects */
8357 if (c2->gtFlags & GTF_GLOB_EFFECT)
8362 /* The second condition must not be too expensive */
8366 if (c2->gtCostEx > 12)
8373 var_types foldType = c1->TypeGet();
8374 if (varTypeIsGC(foldType))
8376 foldType = TYP_I_IMPL;
8381 /* Both conditions must be the same */
8383 if (t1->gtOper != t2->gtOper)
8388 if (t1->gtOper == GT_EQ)
8390 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8391 So we will branch to BX if (c1&c2)==0 */
8398 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8399 So we will branch to BX if (c1|c2)!=0 */
8407 /* The b1 condition must be the reverse of the b2 condition */
8409 if (t1->gtOper == t2->gtOper)
8414 if (t1->gtOper == GT_EQ)
8416 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8417 So we will branch to BX if (c1&c2)!=0 */
8424 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8425 So we will branch to BX if (c1|c2)==0 */
8432 // Anding requires both values to be 0 or 1
8434 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8440 // Now update the trees
8442 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8445 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8446 cmpOp1->gtFlags |= GTF_BOOLEAN;
8450 t1->gtOp.gtOp1 = cmpOp1;
8451 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8453 #if FEATURE_SET_FLAGS
8454 // For comparisons against zero we will have the GTF_SET_FLAGS set
8455 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8456 // during the CSE phase.
8458 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8459 // as they are no longer feeding directly into a comparisons against zero
8461 // Make sure that the GTF_SET_FLAGS bit is cleared.
8462 // Fix 388436 ARM JitStress WP7
8463 c1->gtFlags &= ~GTF_SET_FLAGS;
8464 c2->gtFlags &= ~GTF_SET_FLAGS;
8466 // The new top level node that we just created does feed directly into
8467 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8468 // we generate an instuction that sets the flags, which allows us
8469 // to omit the cmp with zero instruction.
8471 // Request that the codegen for cmpOp1 sets the condition flags
8472 // when it generates the code for cmpOp1.
8474 cmpOp1->gtRequestSetFlags();
8477 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8480 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8484 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8488 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8490 fgRemoveRefPred(b1->bbJumpDest, b1);
8492 b1->bbJumpDest = b2->bbJumpDest;
8494 fgAddRefPred(b2->bbJumpDest, b1);
8497 noway_assert(edge1 != nullptr);
8498 noway_assert(edge2 != nullptr);
8500 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8501 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8502 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8504 edge1->flEdgeWeightMin = edgeSumMin;
8505 edge1->flEdgeWeightMax = edgeSumMax;
8509 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8510 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8513 /* Get rid of the second block (which is a BBJ_COND) */
8515 noway_assert(b1->bbJumpKind == BBJ_COND);
8516 noway_assert(b2->bbJumpKind == BBJ_COND);
8517 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8518 noway_assert(b1->bbNext == b2);
8519 noway_assert(b2->bbNext);
8522 b2->bbFlags |= BBF_REMOVED;
8524 // If b2 was the last block of a try or handler, update the EH table.
8526 ehUpdateForDeletedBlock(b2);
8528 /* Update bbRefs and bbPreds */
8530 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8531 * Remove pred 'b2' for 'b2->bbJumpDest' */
8533 fgReplacePred(b2->bbNext, b2, b1);
8535 fgRemoveRefPred(b2->bbJumpDest, b2);
8537 /* Update the block numbers and try again */
8549 // Update loop table
8550 fgUpdateLoopsAfterCompacting(b1, b2);
8555 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8556 b1->bbNum, b2->bbNum);
8565 fgDebugCheckBBlist();