1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
17 #pragma warning(disable : 4701)
20 /*****************************************************************************/
24 unsigned Compiler::optRangeChkRmv = 0;
26 unsigned Compiler::optRangeChkAll = 0;
29 /*****************************************************************************/
31 void Compiler::optInit()
33 optLoopsMarked = false;
36 /* Initialize the # of tracked loops to 0 */
38 /* Keep track of the number of calls and indirect calls made by this method */
40 optIndirectCallCount = 0;
41 optNativeCallCount = 0;
42 optAssertionCount = 0;
43 optAssertionDep = nullptr;
45 optCSECandidateTotal = 0;
46 optCSEstart = UINT_MAX;
48 #endif // FEATURE_ANYCSE
51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
55 /*****************************************************************************
59 void Compiler::optSetBlockWeights()
61 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
62 assert(fgDomsComputed);
68 bool firstBBdomsRets = true;
72 for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
74 /* Blocks that can't be reached via the first block are rarely executed */
75 if (!fgReachable(fgFirstBB, block))
77 block->bbSetRunRarely();
80 if (block->bbWeight != BB_ZERO_WEIGHT)
82 // Calculate our bbWeight:
84 // o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
85 // o otherwise BB_UNITY_WEIGHT / 2
87 bool domsRets = true; // Assume that we will dominate
89 for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
91 if (!fgDominate(block, retBlocks->block))
98 if (block == fgFirstBB)
100 firstBBdomsRets = domsRets;
103 // If we are not using profile weight then we lower the weight
104 // of blocks that do not dominate a return block
106 if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
111 block->modifyBBWeight(block->bbWeight / 2);
112 noway_assert(block->bbWeight);
118 if (changed && verbose)
120 printf("\nAfter optSetBlockWeights:\n");
125 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
126 fgDebugCheckBBlist();
130 /*****************************************************************************
132 * Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
137 /* Calculate the 'loopWeight',
138 this is the amount to increase each block in the loop
139 Our heuristic is that loops are weighted eight times more
140 than straight line code.
141 Thus we increase each block by 7 times the weight of
142 the loop header block,
143 if the loops are all properly formed gives us:
144 (assuming that BB_LOOP_WEIGHT is 8)
146 1 -- non loop basic block
147 8 -- single loop nesting
148 64 -- double loop nesting
149 512 -- triple loop nesting
153 noway_assert(begBlk->bbNum <= endBlk->bbNum);
154 noway_assert(begBlk->isLoopHead());
155 noway_assert(fgReachable(begBlk, endBlk));
160 printf("\nMarking loop L%02u", begBlk->bbLoopNum);
164 noway_assert(!opts.MinOpts());
166 /* Build list of backedges for block begBlk */
167 flowList* backedgeList = nullptr;
169 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
171 /* Is this a backedge? */
172 if (pred->flBlock->bbNum >= begBlk->bbNum)
174 flowList* flow = new (this, CMK_FlowList) flowList();
176 #if MEASURE_BLOCK_SIZE
178 genFlowNodeSize += sizeof(flowList);
179 #endif // MEASURE_BLOCK_SIZE
181 flow->flNext = backedgeList;
182 flow->flBlock = pred->flBlock;
187 /* At least one backedge must have been found (the one from endBlk) */
188 noway_assert(backedgeList);
190 BasicBlock* curBlk = begBlk;
194 noway_assert(curBlk);
196 // For curBlk to be part of a loop that starts at begBlk
197 // curBlk must be reachable from begBlk and (since this is a loop)
198 // likewise begBlk must be reachable from curBlk.
201 if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
203 /* If this block reaches any of the backedge blocks we set reachable */
204 /* If this block dominates any of the backedge blocks we set dominates */
205 bool reachable = false;
206 bool dominates = false;
208 for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
210 BasicBlock* backedge = tmp->flBlock;
212 if (!curBlk->isRunRarely())
214 reachable |= fgReachable(curBlk, backedge);
215 dominates |= fgDominate(curBlk, backedge);
217 if (dominates && reachable)
226 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
230 if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
232 // We have real profile weights, so we aren't going to change this blocks weight
233 weight = curBlk->bbWeight;
239 weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
243 weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
247 // The multiplication may have caused us to overflow
249 if (weight < curBlk->bbWeight)
251 // The multiplication caused us to overflow
252 weight = BB_MAX_WEIGHT;
255 // Set the new weight
257 curBlk->modifyBBWeight(weight);
262 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
268 /* Stop if we've reached the last block in the loop */
270 if (curBlk == endBlk)
275 curBlk = curBlk->bbNext;
277 /* If we are excluding the endBlk then stop if we've reached endBlk */
279 if (excludeEndBlk && (curBlk == endBlk))
286 /*****************************************************************************
288 * Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
293 /* A set of blocks that were previously marked as a loop are now
294 to be unmarked, since we have decided that for some reason this
295 loop no longer exists.
296 Basically we are just reseting the blocks bbWeight to their
300 noway_assert(begBlk->bbNum <= endBlk->bbNum);
301 noway_assert(begBlk->isLoopHead());
303 noway_assert(!opts.MinOpts());
306 unsigned backEdgeCount = 0;
308 for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
310 curBlk = pred->flBlock;
312 /* is this a backward edge? (from curBlk to begBlk) */
314 if (begBlk->bbNum > curBlk->bbNum)
319 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
321 if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
329 /* Only unmark the loop blocks if we have exactly one loop back edge */
330 if (backEdgeCount != 1)
335 if (backEdgeCount > 0)
337 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
339 else if (backEdgeCount == 0)
341 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
347 noway_assert(backEdgeCount == 1);
348 noway_assert(fgReachable(begBlk, endBlk));
353 printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
360 noway_assert(curBlk);
362 // For curBlk to be part of a loop that starts at begBlk
363 // curBlk must be reachable from begBlk and (since this is a loop)
364 // likewise begBlk must be reachable from curBlk.
366 if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
368 unsigned weight = curBlk->bbWeight;
370 // Don't unmark blocks that are set to BB_MAX_WEIGHT
371 // Don't unmark blocks when we are using profile weights
373 if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
375 if (!fgDominate(curBlk, endBlk))
381 /* Merging of blocks can disturb the Dominates
382 information (see RAID #46649) */
383 if (weight < BB_LOOP_WEIGHT)
389 // We can overflow here so check for it
390 if (weight < curBlk->bbWeight)
392 weight = BB_MAX_WEIGHT;
395 assert(weight >= BB_LOOP_WEIGHT);
397 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
403 printf("\n BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
407 /* Stop if we've reached the last block in the loop */
409 if (curBlk == endBlk)
414 curBlk = curBlk->bbNext;
416 /* Stop if we go past the last block in the loop, as it may have been deleted */
417 if (curBlk->bbNum > endBlk->bbNum)
424 /*****************************************************************************************************
426 * Function called to update the loop table and bbWeight before removing a block
429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
436 noway_assert(!opts.MinOpts());
438 bool removeLoop = false;
440 /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
441 /* Special case: the block was the head of a loop - or pointing to a loop entry */
443 for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
445 /* Some loops may have been already removed by
446 * loop unrolling or conditional folding */
448 if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
453 if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
455 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
462 printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
463 optPrintLoopInfo(loopNum);
467 /* If the loop is still in the table
468 * any block in the loop must be reachable !!! */
470 noway_assert(optLoopTable[loopNum].lpEntry != block);
471 noway_assert(optLoopTable[loopNum].lpBottom != block);
473 if (optLoopTable[loopNum].lpExit == block)
475 optLoopTable[loopNum].lpExit = nullptr;
476 optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
480 /* If this points to the actual entry in the loop
481 * then the whole loop may become unreachable */
483 switch (block->bbJumpKind)
486 BasicBlock** jumpTab;
490 if (block->bbNext == optLoopTable[loopNum].lpEntry)
495 if (block->bbJumpKind == BBJ_NONE)
503 noway_assert(block->bbJumpDest);
504 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
511 jumpCnt = block->bbJumpSwt->bbsCount;
512 jumpTab = block->bbJumpSwt->bbsDstTab;
516 noway_assert(*jumpTab);
517 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
521 } while (++jumpTab, --jumpCnt);
530 /* Check if the entry has other predecessors outside the loop
531 * TODO: Replace this when predecessors are available */
533 BasicBlock* auxBlock;
534 for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
536 /* Ignore blocks in the loop */
538 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
539 auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
544 switch (auxBlock->bbJumpKind)
547 BasicBlock** jumpTab;
551 if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
556 if (auxBlock->bbJumpKind == BBJ_NONE)
564 noway_assert(auxBlock->bbJumpDest);
565 if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
572 jumpCnt = auxBlock->bbJumpSwt->bbsCount;
573 jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
577 noway_assert(*jumpTab);
578 if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
582 } while (++jumpTab, --jumpCnt);
592 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
595 else if (optLoopTable[loopNum].lpHead == block)
597 /* The loop has a new head - Just update the loop table */
598 optLoopTable[loopNum].lpHead = block->bbPrev;
604 printf("\nUpdateLoopsBeforeRemoveBlock After: ");
605 optPrintLoopInfo(loopNum);
610 if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
611 (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
612 (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
614 optUnmarkLoopBlocks(block->bbJumpDest, block);
620 /*****************************************************************************
622 * Given the beginBlock of the loop, return the index of this loop
626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
630 for (lnum = 0; lnum < optLoopCount; lnum++)
632 if (optLoopTable[lnum].lpHead->bbNext == begBlk)
639 noway_assert(!"Loop number not found.");
644 /*****************************************************************************
646 * Print loop info in an uniform way.
649 void Compiler::optPrintLoopInfo(unsigned loopInd,
654 BasicBlock* lpBottom,
655 unsigned char lpExitCnt,
659 noway_assert(lpHead);
662 // NOTE: we take "loopInd" as an argument instead of using the one
663 // stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
664 // has not be set correctly. For example, in optRecordLoop().
665 // However, in most of the cases, loops should have been recorded.
666 // Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
667 // version of this method.
669 printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
670 if (lpTop != lpFirst)
672 printf(" (loop top is BB%02u)", lpTop->bbNum);
675 printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
680 printf(" at BB%02u", lpExit->bbNum);
683 if (parentLoop != BasicBlock::NOT_IN_LOOP)
685 printf(", parent loop = L%02u", parentLoop);
690 /*****************************************************************************
692 * Print loop information given the index of the loop in the loop table.
695 void Compiler::optPrintLoopInfo(unsigned lnum)
697 noway_assert(lnum < optLoopCount);
699 LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
701 optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
702 ldsc->lpExit, ldsc->lpParent);
707 //------------------------------------------------------------------------
708 // optPopulateInitInfo: Populate loop init info in the loop table.
711 // init - the tree that is supposed to initialize the loop iterator.
712 // iterVar - loop iteration variable.
715 // "false" if the loop table could not be populated with the loop iterVar init info.
718 // The 'init' tree is checked if its lhs is a local and rhs is either
719 // a const or a local.
721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
723 // Operator should be =
724 if (init->gtOper != GT_ASG)
729 GenTreePtr lhs = init->gtOp.gtOp1;
730 GenTreePtr rhs = init->gtOp.gtOp2;
731 // LHS has to be local and should equal iterVar.
732 if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
737 // RHS can be constant or local var.
738 // TODO-CQ: CLONE: Add arr length for descending loops.
739 if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
741 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
742 optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
744 else if (rhs->gtOper == GT_LCL_VAR)
746 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
747 optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
756 //----------------------------------------------------------------------------------
757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
760 // test "jtrue" tree or an asg of the loop iter termination condition
761 // from/to blocks (beg, end) which are part of the loop.
762 // iterVar loop iteration variable.
763 // loopInd loop index.
766 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
767 // and the rhs limit is extracted from the "test" tree. The limit information is
768 // added to the loop table.
771 // "false" if the loop table could not be populated with the loop test info or
772 // if the test condition doesn't involve iterVar.
774 bool Compiler::optCheckIterInLoopTest(
775 unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
777 // Obtain the relop from the "test" tree.
779 if (test->gtOper == GT_JTRUE)
781 relop = test->gtGetOp1();
785 assert(test->gtOper == GT_ASG);
786 relop = test->gtGetOp2();
789 noway_assert(relop->OperKind() & GTK_RELOP);
791 GenTreePtr opr1 = relop->gtOp.gtOp1;
792 GenTreePtr opr2 = relop->gtOp.gtOp2;
797 // Make sure op1 or op2 is the iterVar.
798 if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
803 else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
813 if (iterOp->gtType != TYP_INT)
818 // Mark the iterator node.
819 iterOp->gtFlags |= GTF_VAR_ITERATOR;
821 // Check what type of limit we have - constant, variable or arr-len.
822 if (limitOp->gtOper == GT_CNS_INT)
824 optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
826 else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
828 optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
830 else if (limitOp->gtOper == GT_ARR_LENGTH)
832 optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
838 // Save the type of the comparison between the iterator and the limit.
839 optLoopTable[loopInd].lpTestTree = relop;
843 //----------------------------------------------------------------------------------
844 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
847 // incr The incr tree to be checked. Whether incr tree is
848 // oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
851 // The test tree is parsed to check if "iterVar" matches the lhs of the condition
852 // and the rhs limit is extracted from the "test" tree. The limit information is
853 // added to the loop table.
856 // iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
858 unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
861 genTreeOps updateOper;
862 unsigned iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
863 if (iterVar != BAD_VAR_NUM)
865 // We have v = v op y type asg node.
878 // Increment should be by a const int.
879 // TODO-CQ: CLONE: allow variable increments.
880 if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
889 //----------------------------------------------------------------------------------
890 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
893 // from, to - are blocks (beg, end) which are part of the loop.
894 // incr - tree that increments the loop iterator. v+=1 or v=v+1.
895 // pIterVar - see return value.
898 // Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
902 // Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
903 // assigned in the loop.
905 bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
908 unsigned iterVar = optIsLoopIncrTree(incr);
909 if (iterVar == BAD_VAR_NUM)
913 if (optIsVarAssigned(from, to, incr, iterVar))
915 JITDUMP("iterVar is assigned in loop\n");
923 //----------------------------------------------------------------------------------
924 // optIsLoopTestEvalIntoTemp:
925 // Pattern match if the test tree is computed into a tmp
926 // and the "tmp" is used as jump condition for loop termination.
929 // testStmt - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
930 // where Vtmp contains the actual loop test result.
931 // newStmt - contains the statement that is the actual test stmt involving
932 // the loop iterator.
935 // Returns true if a new test tree can be obtained.
938 // Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
939 // Then returns the rhs for def of Vtmp as the "test" node.
942 // This method just retrieves what it thinks is the "test" node,
943 // the callers are expected to verify that "iterVar" is used in the test.
945 bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
947 GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
949 if (test->gtOper != GT_JTRUE)
954 GenTreePtr relop = test->gtGetOp1();
955 noway_assert(relop->OperIsCompare());
957 GenTreePtr opr1 = relop->gtOp.gtOp1;
958 GenTreePtr opr2 = relop->gtOp.gtOp2;
960 // Make sure we have jtrue (vtmp != 0)
961 if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
962 opr2->IsIntegralConst(0))
964 // Get the previous statement to get the def (rhs) of Vtmp to see
965 // if the "test" is evaluated into Vtmp.
966 GenTreePtr prevStmt = testStmt->gtPrev;
967 if (prevStmt == nullptr)
972 GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
973 if (tree->OperGet() == GT_ASG)
975 GenTreePtr lhs = tree->gtOp.gtOp1;
976 GenTreePtr rhs = tree->gtOp.gtOp2;
978 // Return as the new test node.
979 if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
981 if (rhs->OperIsCompare())
992 //----------------------------------------------------------------------------------
993 // optExtractInitTestIncr:
994 // Extract the "init", "test" and "incr" nodes of the loop.
997 // head - Loop head block
998 // bottom - Loop bottom block
999 // top - Loop top block
1000 // ppInit - The init stmt of the loop if found.
1001 // ppTest - The test stmt of the loop if found.
1002 // ppIncr - The incr stmt of the loop if found.
1005 // The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1006 // returns true. Returns false if the information can't be extracted.
1009 // Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1010 // "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1011 // "test" to get the "incr" stmt. If it is not found it could be a loop of the
1014 // +-------<-----------------<-----------+
1017 // BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1019 // Check if the "incr" tree is present in the loop "top" node as the last stmt.
1020 // Also check if the "test" tree is assigned to a tmp node and the tmp is used
1021 // in the jtrue condition.
1024 // This method just retrieves what it thinks is the "test" node,
1025 // the callers are expected to verify that "iterVar" is used in the test.
1027 bool Compiler::optExtractInitTestIncr(
1028 BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
1030 assert(ppInit != nullptr);
1031 assert(ppTest != nullptr);
1032 assert(ppIncr != nullptr);
1034 // Check if last two statements in the loop body are the increment of the iterator
1035 // and the loop termination test.
1036 noway_assert(bottom->bbTreeList != nullptr);
1037 GenTreePtr test = bottom->bbTreeList->gtPrev;
1038 noway_assert(test != nullptr && test->gtNext == nullptr);
1041 if (optIsLoopTestEvalIntoTemp(test, &newTest))
1046 // Check if we have the incr tree before the test tree, if we don't,
1047 // check if incr is part of the loop "top".
1048 GenTreePtr incr = test->gtPrev;
1049 if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1051 if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1056 // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1057 GenTreePtr topLast = top->bbTreeList->gtPrev;
1058 if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1068 assert(test != incr);
1070 // Find the last statement in the loop pre-header which we expect to be the initialization of
1071 // the loop iterator.
1072 GenTreePtr phdr = head->bbTreeList;
1073 if (phdr == nullptr)
1078 GenTreePtr init = phdr->gtPrev;
1079 noway_assert(init != nullptr && (init->gtNext == nullptr));
1081 // If it is a duplicated loop condition, skip it.
1082 if (init->gtFlags & GTF_STMT_CMPADD)
1084 // Must be a duplicated loop condition.
1085 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1086 init = init->gtPrev;
1087 noway_assert(init != nullptr);
1090 noway_assert(init->gtOper == GT_STMT);
1091 noway_assert(test->gtOper == GT_STMT);
1092 noway_assert(incr->gtOper == GT_STMT);
1094 *ppInit = init->gtStmt.gtStmtExpr;
1095 *ppTest = test->gtStmt.gtStmtExpr;
1096 *ppIncr = incr->gtStmt.gtStmtExpr;
1101 /*****************************************************************************
1103 * Record the loop in the loop table.
1106 void Compiler::optRecordLoop(BasicBlock* head,
1112 unsigned char exitCnt)
1114 // Record this loop in the table, if there's room.
1116 assert(optLoopCount <= MAX_LOOP_NUM);
1117 if (optLoopCount == MAX_LOOP_NUM)
1120 loopOverflowThisMethod = true;
1125 // Assumed preconditions on the loop we're adding.
1126 assert(first->bbNum <= top->bbNum);
1127 assert(top->bbNum <= entry->bbNum);
1128 assert(entry->bbNum <= bottom->bbNum);
1129 assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1131 // If the new loop contains any existing ones, add it in the right place.
1132 unsigned char loopInd = optLoopCount;
1133 for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1135 unsigned char prev = prevPlus1 - 1;
1136 if (optLoopTable[prev].lpContainedBy(first, bottom))
1141 // Move up any loops if necessary.
1142 for (unsigned j = optLoopCount; j > loopInd; j--)
1144 optLoopTable[j] = optLoopTable[j - 1];
1148 for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1150 // The loop is well-formed.
1151 assert(optLoopTable[i].lpWellFormed());
1152 // Check for disjoint.
1153 if (optLoopTable[i].lpDisjoint(first, bottom))
1157 // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1158 assert(optLoopTable[i].lpContainedBy(first, bottom));
1162 optLoopTable[loopInd].lpHead = head;
1163 optLoopTable[loopInd].lpFirst = first;
1164 optLoopTable[loopInd].lpTop = top;
1165 optLoopTable[loopInd].lpBottom = bottom;
1166 optLoopTable[loopInd].lpEntry = entry;
1167 optLoopTable[loopInd].lpExit = exit;
1168 optLoopTable[loopInd].lpExitCnt = exitCnt;
1170 optLoopTable[loopInd].lpParent = BasicBlock::NOT_IN_LOOP;
1171 optLoopTable[loopInd].lpChild = BasicBlock::NOT_IN_LOOP;
1172 optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1174 optLoopTable[loopInd].lpFlags = 0;
1176 // We haven't yet recorded any side effects.
1177 optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
1178 optLoopTable[loopInd].lpFieldsModified = nullptr;
1179 optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1181 // If DO-WHILE loop mark it as such.
1182 if (head->bbNext == entry)
1184 optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1187 // If single exit loop mark it as such.
1191 optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1195 // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1196 // We have the following restrictions:
1197 // 1. The loop condition must be a simple one i.e. only one JTRUE node
1198 // 2. There must be a loop iterator (a local var) that is
1199 // incremented (decremented or lsh, rsh, mul) with a constant value
1200 // 3. The iterator is incremented exactly once
1201 // 4. The loop condition must use the iterator.
1203 if (bottom->bbJumpKind == BBJ_COND)
1208 if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1213 unsigned iterVar = BAD_VAR_NUM;
1214 if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1219 // Make sure the "iterVar" initialization is never skipped,
1220 // i.e. HEAD dominates the ENTRY.
1221 if (!fgDominate(head, entry))
1226 if (!optPopulateInitInfo(loopInd, init, iterVar))
1231 // Check that the iterator is used in the loop condition.
1232 if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1237 // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1238 // Record the iterator, the pointer to the test node
1239 // and the initial value of the iterator (constant or local var)
1240 optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1243 optLoopTable[loopInd].lpIterTree = incr;
1246 // Save the initial value of the iterator - can be lclVar or constant
1247 // Flag the loop accordingly.
1253 simpleTestLoopCount++;
1256 // Check if a constant iteration loop.
1257 if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1259 // This is a constant loop.
1260 optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1262 constIterLoopCount++;
1269 printf("\nConstant loop initializer:\n");
1272 printf("\nConstant loop body:\n");
1274 BasicBlock* block = head;
1277 block = block->bbNext;
1278 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1280 if (stmt->gtStmt.gtStmtExpr == incr)
1285 gtDispTree(stmt->gtStmt.gtStmtExpr);
1287 } while (block != bottom);
1293 DBEXEC(verbose, optPrintLoopRecording(loopInd));
1298 //------------------------------------------------------------------------
1299 // optPrintLoopRecording: Print a recording of the loop.
1302 // loopInd - loop index.
1304 void Compiler::optPrintLoopRecording(unsigned loopInd)
1306 printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1307 optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1308 optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1309 optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1310 optLoopTable[loopInd].lpExit);
1312 // If an iterator loop print the iterator and the initialization.
1313 if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1315 printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1317 printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
1319 printf("%d )", optLoopTable[loopInd].lpIterConst());
1321 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1323 printf(" from %d", optLoopTable[loopInd].lpConstInit);
1325 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1327 printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1330 // If a simple test condition print operator and the limits */
1331 printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
1333 if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1335 printf("%d ", optLoopTable[loopInd].lpConstLimit());
1338 if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1340 printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1349 void Compiler::optCheckPreds()
1352 BasicBlock* blockPred;
1355 for (block = fgFirstBB; block; block = block->bbNext)
1357 for (pred = block->bbPreds; pred; pred = pred->flNext)
1359 // make sure this pred is part of the BB list
1360 for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1362 if (blockPred == pred->flBlock)
1367 noway_assert(blockPred);
1368 switch (blockPred->bbJumpKind)
1371 if (blockPred->bbJumpDest == block)
1377 noway_assert(blockPred->bbNext == block);
1379 case BBJ_EHFILTERRET:
1381 case BBJ_EHCATCHRET:
1382 noway_assert(blockPred->bbJumpDest == block);
1393 /*****************************************************************************
1394 * Find the natural loops, using dominators. Note that the test for
1395 * a loop is slightly different from the standard one, because we have
1396 * not done a depth first reordering of the basic blocks.
1399 void Compiler::optFindNaturalLoops()
1404 printf("*************** In optFindNaturalLoops()\n");
1410 flowList* predEntry;
1412 noway_assert(fgDomsComputed);
1416 hasMethodLoops = false;
1417 loopsThisMethod = 0;
1418 loopOverflowThisMethod = false;
1421 /* We will use the following terminology:
1422 * HEAD - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1423 Not part of the looping of the loop.
1424 * FIRST - the lexically first basic block (in bbNext order) within this loop. (May be part of a nested loop,
1425 * but not the outer loop. ???)
1426 * TOP - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1427 * BOTTOM - the lexically last block in the loop (i.e. the block from which we jump to the top)
1428 * EXIT - the loop exit or the block right after the bottom
1429 * ENTRY - the entry in the loop (not necessarly the TOP), but there must be only one entry
1431 * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1465 unsigned char exitCount;
1467 for (head = fgFirstBB; head->bbNext; head = head->bbNext)
1473 // Blocks that are rarely run have a zero bbWeight and should
1474 // never be optimized here
1476 if (top->bbWeight == BB_ZERO_WEIGHT)
1481 for (pred = top->bbPreds; pred; pred = pred->flNext)
1483 /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1484 * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1485 * as the definition says, but merely an indication that we have a loop there).
1486 * Thus, we have to be very careful and after entry discovery check that it is indeed
1487 * the only place we enter the loop (especially for non-reducible flow graphs).
1490 bottom = pred->flBlock;
1493 if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1495 if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1496 (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1497 (bottom->bbJumpKind == BBJ_SWITCH))
1499 /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1500 * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1504 BasicBlock* loopBlock;
1506 /* The presence of a "back edge" is an indication that a loop might be present here
1509 * 1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1510 * node in the loop to any other node in the loop (wholly within the loop)
1511 * 2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1512 * in the loop from outside the loop, and that is through the ENTRY
1515 /* Let's find the loop ENTRY */
1517 if (head->bbJumpKind == BBJ_ALWAYS)
1519 if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1521 /* OK - we enter somewhere within the loop */
1522 entry = head->bbJumpDest;
1524 /* some useful asserts
1525 * Cannot enter at the top - should have being caught by redundant jumps */
1527 assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1531 /* special case - don't consider now */
1532 // assert (!"Loop entered in weird way!");
1536 // Can we fall through into the loop?
1537 else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1539 /* The ENTRY is at the TOP (a do-while loop) */
1544 goto NO_LOOP; // head does not flow into the loop bail for now
1547 // Now we find the "first" block -- the earliest block reachable within the loop.
1548 // This is usually the same as "top", but can differ in rare cases where "top" is
1549 // the entry block of a nested loop, and that nested loop branches backwards to a
1550 // a block before "top". We find this by searching for such backwards branches
1551 // in the loop known so far.
1552 BasicBlock* first = top;
1553 BasicBlock* newFirst;
1554 bool blocksToSearch = true;
1555 BasicBlock* validatedAfter = bottom->bbNext;
1556 while (blocksToSearch)
1558 blocksToSearch = false;
1560 blocksToSearch = false;
1561 for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
1563 unsigned nSucc = loopBlock->NumSucc();
1564 for (unsigned j = 0; j < nSucc; j++)
1566 BasicBlock* succ = loopBlock->GetSucc(j);
1567 if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
1568 (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
1574 if (newFirst != nullptr)
1576 validatedAfter = first;
1578 blocksToSearch = true;
1582 // Is "head" still before "first"? If not, we don't have a valid loop...
1583 if (head->bbNum >= first->bbNum)
1586 "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u. Rejecting loop.\n",
1587 top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
1591 /* Make sure ENTRY dominates all blocks in the loop
1592 * This is necessary to ensure condition 2. above
1593 * At the same time check if the loop has a single exit
1594 * point - those loops are easier to optimize */
1596 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1598 if (!fgDominate(entry, loopBlock))
1603 if (loopBlock == bottom)
1605 if (bottom->bbJumpKind != BBJ_ALWAYS)
1607 /* there is an exit at the bottom */
1609 noway_assert(bottom->bbJumpDest == top);
1616 BasicBlock* exitPoint;
1618 switch (loopBlock->bbJumpKind)
1621 case BBJ_CALLFINALLY:
1623 case BBJ_EHCATCHRET:
1624 assert(loopBlock->bbJumpDest);
1625 exitPoint = loopBlock->bbJumpDest;
1627 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1629 /* exit from a block other than BOTTOM */
1638 case BBJ_EHFINALLYRET:
1639 case BBJ_EHFILTERRET:
1640 /* The "try" associated with this "finally" must be in the
1641 * same loop, so the finally block will return control inside the loop */
1646 /* those are exits from the loop */
1654 jumpCnt = loopBlock->bbJumpSwt->bbsCount;
1655 BasicBlock** jumpTab;
1656 jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
1660 noway_assert(*jumpTab);
1661 exitPoint = *jumpTab;
1663 if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
1668 } while (++jumpTab, --jumpCnt);
1672 noway_assert(!"Unexpected bbJumpKind");
1677 /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
1678 * This is to ensure condition 1. above which prevents marking fake loops
1680 * Below is an example:
1688 * The example above is not a loop since we bail after the first iteration
1690 * The condition we have to check for is
1691 * 1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
1692 * reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
1694 * 2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
1695 * loop bottom then we cannot iterate
1697 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
1698 * part of the loop nodes (as per definition they are loop exits executed only once),
1699 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
1701 for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
1703 switch (loopBlock->bbJumpKind)
1708 if (fgDominate(loopBlock, bottom))
1717 bool canIterateLoop = false;
1719 for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
1721 if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
1723 canIterateLoop = true;
1726 else if (predEntry->flBlock != head)
1728 // The entry block has multiple predecessors outside the loop; the 'head'
1729 // block isn't the only one. We only support a single 'head', so bail.
1734 if (!canIterateLoop)
1739 /* Double check - make sure that all loop blocks except ENTRY
1740 * have no predecessors outside the loop - this ensures only one loop entry and prevents
1741 * us from considering non-loops due to incorrectly assuming that we had a back edge
1744 * Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
1747 for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
1749 if (loopBlock == entry)
1754 for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
1756 if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
1758 // noway_assert(!"Found loop with multiple entries");
1764 // Disqualify loops where the first block of the loop is less nested in EH than
1765 // the bottom block. That is, we don't want to handle loops where the back edge
1766 // goes from within an EH region to a first block that is outside that same EH
1767 // region. Note that we *do* handle loops where the first block is the *first*
1768 // block of a more nested EH region (since it is legal to branch to the first
1769 // block of an immediately more nested EH region). So, for example, disqualify
1776 // BB10 BBJ_COND => BB02
1780 // Here, BB10 is more nested than BB02.
1782 if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
1784 JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1786 first->bbNum, bottom->bbNum);
1790 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1791 // Disqualify loops where the first block of the loop is a finally target.
1792 // The main problem is when multiple loops share a 'first' block that is a finally
1793 // target and we canonicalize the loops by adding a new loop head. In that case, we
1794 // need to update the blocks so the finally target bit is moved to the newly created
1795 // block, and removed from the old 'first' block. This is 'hard', so at this point
1796 // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1797 // long-term), it's easier to disallow the loop than to update the flow graph to
1798 // support this case.
1800 if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1802 JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1805 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1807 /* At this point we have a loop - record it in the loop table
1808 * If we found only one exit, record it in the table too
1809 * (otherwise an exit = 0 in the loop table means multiple exits) */
1816 optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
1819 if (!hasMethodLoops)
1821 /* mark the method as containing natural loops */
1823 hasMethodLoops = true;
1826 /* increment total number of loops found */
1830 /* keep track of the number of exits */
1831 loopExitCountTable.record(static_cast<unsigned>(exitCount));
1832 #endif // COUNT_LOOPS
1835 /* current predecessor not good for a loop - continue with another one, if any */
1841 loopCountTable.record(loopsThisMethod);
1842 if (maxLoopsPerMethod < loopsThisMethod)
1844 maxLoopsPerMethod = loopsThisMethod;
1846 if (loopOverflowThisMethod)
1848 totalLoopOverflows++;
1850 #endif // COUNT_LOOPS
1852 // Now the loop indices are stable. We can figure out parent/child relationships
1853 // (using table indices to name loops), and label blocks.
1854 for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
1856 for (unsigned char possibleParent = loopInd; possibleParent > 0;)
1859 if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
1861 optLoopTable[loopInd].lpParent = possibleParent;
1862 optLoopTable[loopInd].lpSibling = optLoopTable[possibleParent].lpChild;
1863 optLoopTable[possibleParent].lpChild = loopInd;
1869 // Now label the blocks with the innermost loop to which they belong. Since parents
1870 // precede children in the table, doing the labeling for each loop in order will achieve
1871 // this -- the innermost loop labeling will be done last.
1872 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1874 BasicBlock* first = optLoopTable[loopInd].lpFirst;
1875 BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
1876 for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
1878 blk->bbNatLoopNum = loopInd;
1883 assert(blk->bbNext != nullptr); // We should never reach nullptr.
1887 // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
1888 // one, if necessary, for loops containing others that share a "top."
1890 for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
1892 // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
1893 if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
1899 if (optCanonicalizeLoopNest(loopInd))
1906 fgUpdateChangedFlowGraph();
1910 if (verbose && optLoopCount > 0)
1912 printf("\nFinal natural loop table:\n");
1913 for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
1915 optPrintLoopInfo(loopInd);
1922 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
1924 BasicBlock* newJumpDest = nullptr;
1925 switch (blk->bbJumpKind)
1930 case BBJ_EHFILTERRET:
1931 case BBJ_EHFINALLYRET:
1932 case BBJ_EHCATCHRET:
1933 // These have no jump destination to update.
1938 case BBJ_CALLFINALLY:
1940 // All of these have a single jump destination to update.
1941 if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
1943 blk->bbJumpDest = newJumpDest;
1949 bool redirected = false;
1950 for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
1952 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
1954 blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
1958 // If any redirections happend, invalidate the switch table map for the switch.
1961 GetSwitchDescMap()->Remove(blk);
1971 // TODO-Cleanup: This should be a static member of the BasicBlock class.
1972 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
1974 assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
1976 // copy the jump destination(s) from "from" to "to".
1977 switch (to->bbJumpKind)
1981 case BBJ_CALLFINALLY:
1983 // All of these have a single jump destination to update.
1984 to->bbJumpDest = from->bbJumpDest;
1989 to->bbJumpSwt = new (this, CMK_BasicBlock) BBswtDesc();
1990 to->bbJumpSwt->bbsCount = from->bbJumpSwt->bbsCount;
1991 to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
1993 for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
1995 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2005 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2006 // Returns 'true' if the flow graph is modified.
2007 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2009 bool modified = false;
2011 // Is the top of the current loop not in any nested loop?
2012 if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2014 if (optCanonicalizeLoop(loopInd))
2020 for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2021 child = optLoopTable[child].lpSibling)
2023 if (optCanonicalizeLoopNest(child))
2032 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2034 // Is the top uniquely part of the current loop?
2035 BasicBlock* t = optLoopTable[loopInd].lpTop;
2037 if (t->bbNatLoopNum == loopInd)
2042 JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2044 loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2046 // Otherwise, the top of this loop is also part of a nested loop.
2048 // Insert a new unique top for this loop. We must be careful to put this new
2049 // block in the correct EH region. Note that f->bbPrev might be in a different
2050 // EH region. For example:
2058 // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2059 // On the other hand, the first block of multiple loops might be the first
2060 // block of a 'try' region that is completely contained in the multiple loops.
2065 // BB10 BBJ_ALWAYS => BB08
2067 // BB12 BBJ_ALWAYS => BB08
2069 // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2070 // is a single-block "try" region. Neither loop "bottom" block is in the same
2071 // "try" region as BB08. This is legal because you can jump to the first block
2072 // of a try region. With EH normalization, no two "try" regions will share
2073 // this block. In this case, we need to insert a new block for the outer loop
2074 // in the same EH region as the branch from the "bottom":
2079 // BB10 BBJ_ALWAYS => BB08
2081 // BB12 BBJ_ALWAYS => BB30
2083 // Another possibility is that the "first" block of the loop nest can be the first block
2084 // of a "try" region that also has other predecessors than those in the loop, or even in
2085 // the "try" region (since blocks can target the first block of a "try" region). For example:
2089 // BB10 BBJ_ALWAYS => BB08
2091 // BB12 BBJ_ALWAYS => BB08
2094 // BB20 BBJ_ALWAYS => BB08
2096 // BB25 BBJ_ALWAYS => BB08
2098 // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2099 // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2100 // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2101 // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2102 // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2103 // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2104 // situation of branching to a non-first block of a 'try' region.
2106 // We can also have a loop nest where the "first" block is outside of a "try" region
2107 // and the back edges are inside a "try" region, for example:
2111 // BB09 try { BBJ_COND => BB02
2113 // BB15 BBJ_COND => BB02
2115 // BB21 } // end of "try"
2117 // In this case, both loop back edges were formed by "leave" instructions that were
2118 // imported into branches that were later made conditional. In this case, we don't
2119 // want to copy the EH region of the back edge, since that would create a block
2120 // outside of and disjoint with the "try" region of the back edge. However, to
2121 // simplify things, we disqualify this type of loop, so we should never see this here.
2123 BasicBlock* h = optLoopTable[loopInd].lpHead;
2124 BasicBlock* f = optLoopTable[loopInd].lpFirst;
2125 BasicBlock* b = optLoopTable[loopInd].lpBottom;
2127 // The loop must be entirely contained within a single handler region.
2128 assert(BasicBlock::sameHndRegion(f, b));
2130 // If the bottom block is in the same "try" region, then we extend the EH
2131 // region. Otherwise, we add the new block outside the "try" region.
2132 bool extendRegion = BasicBlock::sameTryRegion(f, b);
2133 BasicBlock* newT = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2136 // We need to set the EH region manually. Set it to be the same
2137 // as the bottom block.
2138 newT->copyEHRegion(b);
2141 BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
2143 // Redirect the "bottom" of the current loop to "newT".
2144 BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2145 blockMap->Set(t, newT);
2146 optRedirectBlock(b, blockMap);
2148 // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2149 // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2150 // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2151 // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2154 // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2155 // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2156 // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2157 // edge of the blockMap, so nothing will happen.
2158 for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2160 BasicBlock* topPredBlock = topPred->flBlock;
2162 // Skip if topPredBlock is in the loop.
2163 // Note that this uses block number to detect membership in the loop. We are adding blocks during
2164 // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2165 // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2166 if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2168 JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2169 "redirecting its bottom edge\n",
2170 topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2174 JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2176 optRedirectBlock(topPredBlock, blockMap);
2179 assert(newT->bbNext == f);
2182 newT->bbJumpKind = BBJ_ALWAYS;
2183 newT->bbJumpDest = t;
2184 newT->bbTreeList = nullptr;
2185 fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2188 // If it had been a do-while loop (top == entry), update entry, as well.
2189 BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2190 if (optLoopTable[loopInd].lpTop == origE)
2192 optLoopTable[loopInd].lpEntry = newT;
2194 optLoopTable[loopInd].lpTop = newT;
2195 optLoopTable[loopInd].lpFirst = newT;
2197 newT->bbNatLoopNum = loopInd;
2199 JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2200 dspPtr(newT), loopInd);
2202 // Make sure the head block still goes to the entry...
2203 if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2205 h->bbJumpKind = BBJ_ALWAYS;
2206 h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2208 else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2210 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2211 optLoopTable[loopInd].lpHead = h2;
2212 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
2213 h2->bbTreeList = nullptr;
2214 fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2217 // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2218 // it must be the case that they were do-while's (since "h" fell through to the entry).
2219 // The new node "newT" becomes the head of such loops.
2220 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2221 childLoop = optLoopTable[childLoop].lpSibling)
2223 if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2224 newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2226 optUpdateLoopHead(childLoop, h, newT);
2232 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2234 assert(l1 != BasicBlock::NOT_IN_LOOP);
2239 else if (l2 == BasicBlock::NOT_IN_LOOP)
2245 return optLoopContains(l1, optLoopTable[l2].lpParent);
2249 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2251 assert(optLoopTable[loopInd].lpHead == from);
2252 optLoopTable[loopInd].lpHead = to;
2253 for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2254 childLoop = optLoopTable[childLoop].lpSibling)
2256 if (optLoopTable[childLoop].lpHead == from)
2258 optUpdateLoopHead(childLoop, from, to);
2263 /*****************************************************************************
2264 * If the : i += const" will cause an overflow exception for the small types.
2267 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2274 type_MAX = SCHAR_MAX;
2277 type_MAX = UCHAR_MAX;
2280 type_MAX = SHRT_MAX;
2283 type_MAX = USHRT_MAX;
2286 case TYP_UINT: // Detected by checking for 32bit ....
2288 return false; // ... overflow same as done for TYP_INT
2294 if (iterAtExit > type_MAX)
2304 /*****************************************************************************
2305 * If the "i -= const" will cause an underflow exception for the small types
2308 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
2315 type_MIN = SCHAR_MIN;
2318 type_MIN = SHRT_MIN;
2327 case TYP_UINT: // Detected by checking for 32bit ....
2329 return false; // ... underflow same as done for TYP_INT
2335 if (iterAtExit < type_MIN)
2345 /*****************************************************************************
2347 * Helper for unroll loops - Computes the number of repetitions
2348 * in a constant loop. If it cannot prove the number is constant returns false
2351 bool Compiler::optComputeLoopRep(int constInit,
2354 genTreeOps iterOper,
2355 var_types iterOperType,
2356 genTreeOps testOper,
2359 unsigned* iterCount)
2361 noway_assert(genActualType(iterOperType) == TYP_INT);
2364 __int64 constLimitX;
2369 // Using this, we can just do a signed comparison with other 32 bit values.
2372 constLimitX = (unsigned int)constLimit;
2376 constLimitX = (signed int)constLimit;
2379 switch (iterOperType)
2381 // For small types, the iteration operator will narrow these values if big
2383 #define INIT_ITER_BY_TYPE(type) \
2384 constInitX = (type)constInit; \
2385 iterInc = (type)iterInc;
2388 INIT_ITER_BY_TYPE(signed char);
2391 INIT_ITER_BY_TYPE(unsigned char);
2394 INIT_ITER_BY_TYPE(signed short);
2397 INIT_ITER_BY_TYPE(unsigned short);
2400 // For the big types, 32 bit arithmetic is performed
2406 constInitX = (unsigned int)constInit;
2410 constInitX = (signed int)constInit;
2415 noway_assert(!"Bad type");
2419 /* If iterInc is zero we have an infinite loop */
2425 /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
2426 iterSign = (iterInc > 0) ? +1 : -1;
2428 /* Initialize loopCount to zero */
2431 // If dupCond is true then the loop head contains a test which skips
2432 // this loop, if the constInit does not pass the loop test
2433 // Such a loop can execute zero times.
2434 // If dupCond is false then we have a true do-while loop which we
2435 // always execute the loop once before performing the loop test
2439 constInitX += iterInc;
2442 // bail if count is based on wrap-around math
2445 if (constLimitX < constInitX)
2450 else if (constLimitX > constInitX)
2455 /* Compute the number of repetitions */
2459 __int64 iterAtExitX;
2462 /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
2466 /* "for (i=init; i != lim; i+=const)" - this is tricky since it may
2467 * have a constant number of iterations or loop forever -
2468 * we have to compute (lim-init) mod iterInc to see if it is zero.
2469 * If mod iterInc is not zero then the limit test will miss an a wrap will occur
2470 * which is probably not what the end user wanted, but it is legal.
2475 /* Stepping by one, i.e. Mod with 1 is always zero */
2478 if (((constLimitX - constInitX) % iterInc) != 0)
2486 noway_assert(iterInc < 0);
2487 /* Stepping by -1, i.e. Mod with 1 is always zero */
2490 if (((constInitX - constLimitX) % (-iterInc)) != 0)
2506 if (constInitX != constLimitX)
2508 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2511 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2515 iterAtExitX = (unsigned)iterAtExitX;
2518 // Check if iteration incr will cause overflow for small types
2519 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2524 // iterator with 32bit overflow. Bad for TYP_(U)INT
2525 if (iterAtExitX < constLimitX)
2530 *iterCount = loopCount;
2546 noway_assert(!"Unknown operator for loop iterator");
2560 if (constInitX < constLimitX)
2562 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2565 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2569 iterAtExitX = (unsigned)iterAtExitX;
2572 // Check if iteration incr will cause overflow for small types
2573 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2578 // iterator with 32bit overflow. Bad for TYP_(U)INT
2579 if (iterAtExitX < constLimitX)
2584 *iterCount = loopCount;
2600 noway_assert(!"Unknown operator for loop iterator");
2614 if (constInitX <= constLimitX)
2616 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2619 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2623 iterAtExitX = (unsigned)iterAtExitX;
2626 // Check if iteration incr will cause overflow for small types
2627 if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
2632 // iterator with 32bit overflow. Bad for TYP_(U)INT
2633 if (iterAtExitX <= constLimitX)
2638 *iterCount = loopCount;
2654 noway_assert(!"Unknown operator for loop iterator");
2668 if (constInitX > constLimitX)
2670 loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
2673 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2677 iterAtExitX = (unsigned)iterAtExitX;
2680 // Check if small types will underflow
2681 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2686 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2687 if (iterAtExitX > constLimitX)
2692 *iterCount = loopCount;
2708 noway_assert(!"Unknown operator for loop iterator");
2722 if (constInitX >= constLimitX)
2724 loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
2727 iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
2731 iterAtExitX = (unsigned)iterAtExitX;
2734 // Check if small types will underflow
2735 if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
2740 // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
2741 if (iterAtExitX >= constLimitX)
2746 *iterCount = loopCount;
2762 noway_assert(!"Unknown operator for loop iterator");
2767 noway_assert(!"Unknown operator for loop condition");
2773 /*****************************************************************************
2775 * Look for loop unrolling candidates and unroll them
2779 #pragma warning(push)
2780 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
2782 void Compiler::optUnrollLoops()
2784 if (compCodeOpt() == SMALL_CODE)
2789 if (optLoopCount == 0)
2795 if (JitConfig.JitNoUnroll())
2801 if (optCanCloneLoops())
2809 printf("*************** In optUnrollLoops()\n");
2812 /* Look for loop unrolling candidates */
2814 /* Double loop so that after unrolling an inner loop we set change to true
2815 * and we then go back over all of the loop candidates and try to unroll
2816 * the next outer loop, until we don't unroll any loops,
2817 * then change will be false and we are done.
2821 bool change = false;
2823 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
2837 int lbeg; // initial value for iterator
2838 int llim; // limit value for iterator
2839 unsigned lvar; // iterator lclVar #
2840 int iterInc; // value to increment the iterator
2841 genTreeOps iterOper; // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
2842 var_types iterOperType; // type result of the oper (for overflow instrs)
2843 genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.)
2844 bool unsTest; // Is the comparison u/int
2846 unsigned totalIter; // total number of iterations in the constant loop
2847 unsigned loopCostSz; // Cost is size of one iteration
2848 unsigned loopFlags; // actual lpFlags
2849 unsigned requiredFlags; // required lpFlags
2851 GenTree* loopList; // new stmt list of the unrolled loop
2854 static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
2861 noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
2862 noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
2864 unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
2867 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2873 static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
2880 noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
2881 noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
2883 int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
2886 if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
2888 unrollLimitSz *= 10;
2892 loopFlags = optLoopTable[lnum].lpFlags;
2893 requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
2895 /* Ignore the loop if we don't have a do-while with a single exit
2896 that has a constant number of iterations */
2898 if ((loopFlags & requiredFlags) != requiredFlags)
2903 /* ignore if removed or marked as not unrollable */
2905 if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
2910 head = optLoopTable[lnum].lpHead;
2912 bottom = optLoopTable[lnum].lpBottom;
2913 noway_assert(bottom);
2915 /* The single exit must be at the bottom of the loop */
2916 noway_assert(optLoopTable[lnum].lpExit);
2917 if (optLoopTable[lnum].lpExit != bottom)
2922 /* Unrolling loops with jumps in them is not worth the headache
2923 * Later we might consider unrolling loops after un-switching */
2928 block = block->bbNext;
2929 noway_assert(block);
2931 if (block->bbJumpKind != BBJ_NONE)
2933 if (block != bottom)
2938 } while (block != bottom);
2940 /* Get the loop data:
2944 - iterator increment
2945 - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
2946 - loop test type (i.e. GT_GE, GT_LT, etc...)
2949 lbeg = optLoopTable[lnum].lpConstInit;
2950 llim = optLoopTable[lnum].lpConstLimit();
2951 testOper = optLoopTable[lnum].lpTestOper();
2953 lvar = optLoopTable[lnum].lpIterVar();
2954 iterInc = optLoopTable[lnum].lpIterConst();
2955 iterOper = optLoopTable[lnum].lpIterOper();
2957 iterOperType = optLoopTable[lnum].lpIterOperType();
2958 unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
2960 if (lvaTable[lvar].lvAddrExposed)
2961 { // If the loop iteration variable is address-exposed then bail
2964 if (lvaTable[lvar].lvIsStructField)
2965 { // If the loop iteration variable is a promoted field from a struct then
2970 /* Locate the pre-header and initialization and increment/test statements */
2972 phdr = head->bbTreeList;
2974 loop = bottom->bbTreeList;
2977 init = head->lastStmt();
2978 noway_assert(init && (init->gtNext == nullptr));
2979 test = bottom->lastStmt();
2980 noway_assert(test && (test->gtNext == nullptr));
2981 incr = test->gtPrev;
2984 if (init->gtFlags & GTF_STMT_CMPADD)
2986 /* Must be a duplicated loop condition */
2987 noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
2990 init = init->gtPrev;
2998 /* Find the number of iterations - the function returns false if not a constant number */
3000 if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3005 /* Forget it if there are too many repetitions or not a constant loop */
3007 if (totalIter > iterLimit)
3012 noway_assert(init->gtOper == GT_STMT);
3013 init = init->gtStmt.gtStmtExpr;
3014 noway_assert(test->gtOper == GT_STMT);
3015 test = test->gtStmt.gtStmtExpr;
3016 noway_assert(incr->gtOper == GT_STMT);
3017 incr = incr->gtStmt.gtStmtExpr;
3019 // Don't unroll loops we don't understand.
3020 if (incr->gtOper == GT_ASG)
3025 /* Make sure everything looks ok */
3026 if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3027 (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3028 (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3030 !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
3031 (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
3032 (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3034 (test->gtOper != GT_JTRUE))
3036 noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3040 /* heuristic - Estimated cost in code size of the unrolled loop */
3048 block = block->bbNext;
3050 /* Visit all the statements in the block */
3052 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3054 /* Get the expression and stop if end reached */
3056 GenTreePtr expr = stmt->gtStmtExpr;
3062 /* Calculate gtCostSz */
3063 gtSetStmtInfo(stmt);
3065 /* Update loopCostSz */
3066 loopCostSz += stmt->gtCostSz;
3068 } while (block != bottom);
3070 /* Compute the estimated increase in code size for the unrolled loop */
3072 unsigned int fixedLoopCostSz;
3073 fixedLoopCostSz = 8;
3076 unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
3078 /* Don't unroll if too much code duplication would result. */
3080 if (unrollCostSz > unrollLimitSz)
3082 /* prevent this loop from being revisited */
3083 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3087 /* Looks like a good idea to unroll this loop, let's do it! */
3088 CLANG_FORMAT_COMMENT_ANCHOR;
3093 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3094 if (head->bbNext->bbNum != bottom->bbNum)
3096 printf("..BB%02u", bottom->bbNum);
3098 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3099 printf(" unrollCostSz = %d\n", unrollCostSz);
3104 /* Create the unrolled loop statement list */
3106 loopList = loopLast = nullptr;
3108 for (lval = lbeg; totalIter; totalIter--)
3117 block = block->bbNext;
3118 noway_assert(block);
3120 /* Visit all the statements in the block */
3122 for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3124 /* Stop if we've reached the end of the loop */
3126 if (stmt->gtStmtExpr == incr)
3131 /* Clone/substitute the expression */
3133 expr = gtCloneExpr(stmt, 0, lvar, lval);
3135 // cloneExpr doesn't handle everything
3139 optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3143 /* Append the expression to our list */
3147 loopLast->gtNext = expr;
3154 expr->gtPrev = loopLast;
3157 } while (block != bottom);
3159 /* update the new value for the unrolled iterator */
3173 noway_assert(!"Unrolling not implemented for this loop iterator");
3177 noway_assert(!"Unknown operator for constant loop iterator");
3182 /* Finish the linked list */
3186 loopList->gtPrev = loopLast;
3187 loopLast->gtNext = nullptr;
3190 /* Replace the body with the unrolled one */
3196 block = block->bbNext;
3197 noway_assert(block);
3198 block->bbTreeList = nullptr;
3199 block->bbJumpKind = BBJ_NONE;
3200 block->bbFlags &= ~BBF_NEEDS_GCPOLL;
3201 } while (block != bottom);
3203 bottom->bbJumpKind = BBJ_NONE;
3204 bottom->bbTreeList = loopList;
3205 bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
3206 bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
3210 fgMorphStmts(bottom, &dummy, &dummy, &dummy);
3212 /* Update bbRefs and bbPreds */
3213 /* Here head->bbNext is bottom !!! - Replace it */
3215 fgRemoveRefPred(head->bbNext, bottom);
3217 /* Now change the initialization statement in the HEAD to "lvar = lval;"
3218 * (the last value of the iterator in the loop)
3219 * and drop the jump condition since the unrolled loop will always execute */
3221 init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
3223 /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3225 if (head->bbJumpKind == BBJ_COND)
3227 phdr = head->bbTreeList;
3229 test = phdr->gtPrev;
3231 noway_assert(test && (test->gtNext == nullptr));
3232 noway_assert(test->gtOper == GT_STMT);
3233 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3235 init = test->gtPrev;
3236 noway_assert(init && (init->gtNext == test));
3237 noway_assert(init->gtOper == GT_STMT);
3239 init->gtNext = nullptr;
3240 phdr->gtPrev = init;
3241 head->bbJumpKind = BBJ_NONE;
3242 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3244 /* Update bbRefs and bbPreds */
3246 fgRemoveRefPred(head->bbJumpDest, head);
3250 /* the loop must execute */
3251 noway_assert(head->bbJumpKind == BBJ_NONE);
3257 printf("Whole unrolled loop:\n");
3259 GenTreePtr s = loopList;
3263 noway_assert(s->gtOper == GT_STMT);
3274 /* Remember that something has changed */
3278 /* Make sure to update loop table */
3280 /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3281 * (also make head and bottom NULL - to hit an assert or GPF) */
3283 optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3284 optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3296 fgDebugCheckBBlist();
3300 #pragma warning(pop)
3303 /*****************************************************************************
3305 * Return non-zero if there is a code path from 'topBB' to 'botBB' that will
3306 * not execute a method call.
3309 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
3311 // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
3312 // as some helper calls are neither interruptible nor hijackable.
3313 // When we can determine this, then we can set BBF_GC_SAFE_POINT for
3314 // those helpers too.
3316 noway_assert(topBB->bbNum <= botBB->bbNum);
3318 // We can always check topBB and botBB for any gc safe points and early out
3320 if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
3325 // Otherwise we will need to rely upon the dominator sets
3327 if (!fgDomsComputed)
3329 // return a conservative answer of true when we don't have the dominator sets
3333 BasicBlock* curBB = topBB;
3336 noway_assert(curBB);
3338 // If we added a loop pre-header block then we will
3339 // have a bbNum greater than fgLastBB, and we won't have
3340 // any dominator information about this block, so skip it.
3342 if (curBB->bbNum <= fgLastBB->bbNum)
3344 noway_assert(curBB->bbNum <= botBB->bbNum);
3346 // Does this block contain a gc safe point?
3348 if (curBB->bbFlags & BBF_GC_SAFE_POINT)
3350 // Will this block always execute on the way to botBB ?
3352 // Since we are checking every block in [topBB .. botBB] and we are using
3353 // a lexical definition of a loop.
3354 // (all that we know is that is that botBB is a back-edge to topBB)
3355 // Thus while walking blocks in this range we may encounter some blocks
3356 // that are not really part of the loop, and so we need to perform
3357 // some additional checks:
3359 // We will check that the current 'curBB' is reachable from 'topBB'
3360 // and that it dominates the block containing the back-edge 'botBB'
3361 // When both of these are true then we know that the gcsafe point in 'curBB'
3362 // will be encountered in the loop and we can return false
3364 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
3371 // If we've reached the destination block, then we're done
3380 curBB = curBB->bbNext;
3383 // If we didn't find any blocks that contained a gc safe point and
3384 // also met the fgDominate and fgReachable criteria then we must return true
3389 /*****************************************************************************
3391 * Find the loop termination test at the bottom of the loop
3394 static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
3396 GenTreePtr testt = bottom->bbTreeList;
3398 assert(testt && testt->gtOper == GT_STMT);
3400 GenTreePtr result = testt->gtPrev;
3403 while (testt->gtNext)
3405 testt = testt->gtNext;
3408 assert(testt == result);
3414 /*****************************************************************************
3415 * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
3418 void Compiler::fgOptWhileLoop(BasicBlock* block)
3420 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3421 noway_assert(compCodeOpt() != SMALL_CODE);
3424 Optimize while loops into do { } while loop
3425 Our loop hoisting logic requires do { } while loops.
3426 Specifically, we're looking for the following case:
3437 If we find this, and the condition is simple enough, we change
3438 the loop to the following:
3443 // else fall-through
3454 /* Does the BB end with an unconditional jump? */
3456 if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
3457 { // It can't be one of the ones we use for our exception magic
3461 // It has to be a forward jump
3462 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3464 if (fgIsForwardBranch(block) == false)
3469 // Get hold of the jump target
3470 BasicBlock* bTest = block->bbJumpDest;
3472 // Does the block consist of 'jtrue(cond) block' ?
3473 if (bTest->bbJumpKind != BBJ_COND)
3478 // bTest must be a backwards jump to block->bbNext
3479 if (bTest->bbJumpDest != block->bbNext)
3484 // Since test is a BBJ_COND it will have a bbNext
3485 noway_assert(bTest->bbNext);
3487 // 'block' must be in the same try region as the condition, since we're going to insert
3488 // a duplicated condition in 'block', and the condition might include exception throwing code.
3489 if (!BasicBlock::sameTryRegion(block, bTest))
3494 // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
3495 // same try region (or no try region) to avoid generating illegal flow.
3496 BasicBlock* bTestNext = bTest->bbNext;
3497 if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
3502 GenTreePtr condStmt = optFindLoopTermTest(bTest);
3504 // bTest must only contain only a jtrue with no other stmts, we will only clone
3505 // the conditional, so any other statements will not get cloned
3506 // TODO-CQ: consider cloning the whole bTest block as inserting it after block.
3508 if (bTest->bbTreeList != condStmt)
3513 /* Get to the condition node from the statement tree */
3515 noway_assert(condStmt->gtOper == GT_STMT);
3517 GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
3518 noway_assert(condTree->gtOper == GT_JTRUE);
3520 condTree = condTree->gtOp.gtOp1;
3522 // The condTree has to be a RelOp comparison
3523 // TODO-CQ: Check if we can also optimize the backwards jump as well.
3525 if (condTree->OperIsCompare() == false)
3530 /* We call gtPrepareCost to measure the cost of duplicating this tree */
3532 gtPrepareCost(condTree);
3533 unsigned estDupCostSz = condTree->gtCostSz;
3535 double loopIterations = (double)BB_LOOP_WEIGHT;
3537 bool allProfileWeightsAreValid = false;
3538 BasicBlock::weight_t weightBlock = block->bbWeight;
3539 BasicBlock::weight_t weightTest = bTest->bbWeight;
3540 BasicBlock::weight_t weightNext = block->bbNext->bbWeight;
3542 // If we have profile data then we calculate the number of time
3543 // the loop will iterate into loopIterations
3544 if (fgIsUsingProfileWeights())
3546 // Only rely upon the profile weight when all three of these blocks
3547 // have good profile weights
3548 if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
3549 (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
3551 allProfileWeightsAreValid = true;
3553 // If this while loop never iterates then don't bother transforming
3554 if (weightNext == 0)
3559 // with (weighNext > 0) we should also have (weightTest >= weightBlock)
3560 // if the profile weights are all valid.
3562 // weightNext is the number of time this loop iterates
3563 // weightBlock is the number of times that we enter the while loop
3564 // loopIterations is the average number of times that this loop iterates
3566 if (weightTest >= weightBlock)
3568 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
3573 unsigned maxDupCostSz = 32;
3575 // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
3576 // set loop weights yet
3577 if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
3582 // If this loop iterates a lot then raise the maxDupCost
3583 if (loopIterations >= 12.0)
3587 if (loopIterations >= 96.0)
3592 // If the loop condition has a shared static helper, we really want this loop converted
3593 // as not converting the loop will disable loop hoisting, meaning the shared helper will
3594 // be executed on every loop iteration.
3595 int countOfHelpers = 0;
3596 fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
3598 if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
3600 maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
3603 // If the compare has too high cost then we don't want to dup
3605 bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
3610 printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
3611 "\n loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
3612 condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
3613 costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
3614 allProfileWeightsAreValid ? "true" : "false");
3623 /* Looks good - duplicate the condition test */
3625 condTree->gtFlags |= GTF_RELOP_ZTT;
3627 condTree = gtCloneExpr(condTree);
3628 gtReverseCond(condTree);
3630 // Make sure clone expr copied the flag
3631 assert(condTree->gtFlags & GTF_RELOP_ZTT);
3633 condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
3635 /* Create a statement entry out of the condition and
3636 append the condition test at the end of 'block' */
3638 GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
3640 copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
3642 #ifdef DEBUGGING_SUPPORT
3643 if (opts.compDbgInfo)
3645 copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
3649 // Flag the block that received the copy as potentially having an array/vtable
3650 // reference if the block copied from did; this is a conservative guess.
3651 if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
3653 block->bbFlags |= copyFlags;
3656 // If we have profile data for all blocks and we know that we are cloning the
3657 // bTest block into block and thus changing the control flow from block so
3658 // that it no longer goes directly to bTest anymore, we have to adjust the
3659 // weight of bTest by subtracting out the weight of block.
3661 if (allProfileWeightsAreValid)
3664 // Some additional sanity checks before adjusting the weight of bTest
3666 if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
3668 // Get the two edge that flow out of bTest
3669 flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
3670 flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
3672 // Calculate the new weight for block bTest
3674 BasicBlock::weight_t newWeightTest =
3675 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
3676 bTest->bbWeight = newWeightTest;
3678 if (newWeightTest == BB_ZERO_WEIGHT)
3680 bTest->bbFlags |= BBF_RUN_RARELY;
3681 // All out edge weights are set to zero
3682 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3683 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
3684 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3685 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
3689 // Update the our edge weights
3690 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
3691 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
3692 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
3693 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
3698 /* Change the block to end with a conditional jump */
3700 block->bbJumpKind = BBJ_COND;
3701 block->bbJumpDest = bTest->bbNext;
3703 /* Mark the jump dest block as being a jump target */
3704 block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
3706 /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
3708 fgAddRefPred(block->bbNext, block);
3710 fgRemoveRefPred(bTest, block);
3711 fgAddRefPred(bTest->bbNext, block);
3716 printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
3718 printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
3720 gtDispTree(copyOfCondStmt);
3726 /*****************************************************************************
3728 * Optimize the BasicBlock layout of the method
3731 void Compiler::optOptimizeLayout()
3733 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3738 printf("*************** In optOptimizeLayout()\n");
3742 /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
3743 fgDebugCheckBBlist();
3746 noway_assert(fgModified == false);
3748 for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
3750 /* Make sure the appropriate fields are initialized */
3752 if (block->bbWeight == BB_ZERO_WEIGHT)
3754 /* Zero weighted block can't have a LOOP_HEAD flag */
3755 noway_assert(block->isLoopHead() == false);
3759 assert(block->bbLoopNum == 0);
3761 if (compCodeOpt() != SMALL_CODE)
3763 /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
3765 fgOptWhileLoop(block);
3771 // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
3772 fgComputeEdgeWeights();
3775 fgUpdateFlowGraph(true);
3777 fgUpdateFlowGraph();
3780 /*****************************************************************************
3782 * Perform loop inversion, find and classify natural loops
3785 void Compiler::optOptimizeLoops()
3787 noway_assert(!opts.MinOpts() && !opts.compDbgCode);
3792 printf("*************** In optOptimizeLoops()\n");
3796 optSetBlockWeights();
3798 /* Were there any loops in the flow graph? */
3802 /* now that we have dominator information we can find loops */
3804 optFindNaturalLoops();
3806 unsigned loopNum = 0;
3808 /* Iterate over the flow graph, marking all loops */
3810 /* We will use the following terminology:
3811 * top - the first basic block in the loop (i.e. the head of the backward edge)
3812 * bottom - the last block in the loop (i.e. the block from which we jump to the top)
3813 * lastBottom - used when we have multiple back-edges to the same top
3820 for (top = fgFirstBB; top; top = top->bbNext)
3822 BasicBlock* foundBottom = nullptr;
3824 for (pred = top->bbPreds; pred; pred = pred->flNext)
3826 /* Is this a loop candidate? - We look for "back edges" */
3828 BasicBlock* bottom = pred->flBlock;
3830 /* is this a backward edge? (from BOTTOM to TOP) */
3832 if (top->bbNum > bottom->bbNum)
3837 /* 'top' also must have the BBF_LOOP_HEAD flag set */
3839 if (top->isLoopHead() == false)
3844 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
3846 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
3851 /* the top block must be able to reach the bottom block */
3852 if (!fgReachable(top, bottom))
3857 /* Found a new loop, record the longest backedge in foundBottom */
3859 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
3861 foundBottom = bottom;
3869 /* Mark the loop header as such */
3870 assert(FitsIn<unsigned char>(loopNum));
3871 top->bbLoopNum = (unsigned char)loopNum;
3874 /* Mark all blocks between 'top' and 'bottom' */
3876 optMarkLoopBlocks(top, foundBottom, false);
3879 // We track at most 255 loops
3883 totalUnnatLoopOverflows++;
3890 totalUnnatLoopCount += loopNum;
3898 printf("\nFound a total of %d loops.", loopNum);
3899 printf("\nAfter loop weight marking:\n");
3900 fgDispBasicBlocks();
3905 optLoopsMarked = true;
3909 //------------------------------------------------------------------------
3910 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
3913 // loopNum - the current loop index for which conditions are derived.
3914 // context - data structure where all loop cloning info is kept.
3917 // "false" if conditions cannot be obtained. "true" otherwise.
3918 // The cloning conditions are updated in the "conditions"[loopNum] field
3919 // of the "context" parameter.
3922 // Inspect the loop cloning optimization candidates and populate the conditions necessary
3923 // for each optimization candidate. Checks if the loop stride is "> 0" if the loop
3924 // condition is "less than". If the initializer is "var" init then adds condition
3925 // "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
3926 // are added to "context". These conditions are checked in the pre-header block
3927 // and the cloning choice is made.
3930 // Callers should assume AND operation is used i.e., if all conditions are
3931 // true, then take the fast path.
3933 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
3935 JITDUMP("------------------------------------------------------------\n");
3936 JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
3938 LoopDsc* loop = &optLoopTable[loopNum];
3939 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
3941 if (loop->lpTestOper() == GT_LT)
3943 // Stride conditions
3944 if (loop->lpIterConst() <= 0)
3946 JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
3951 if (loop->lpFlags & LPFLG_CONST_INIT)
3953 // Only allowing const init at this time.
3954 if (loop->lpConstInit < 0)
3956 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
3960 else if (loop->lpFlags & LPFLG_VAR_INIT)
3963 LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
3964 LC_Expr(LC_Ident(0, LC_Ident::Const)));
3965 context->EnsureConditions(loopNum)->Push(geZero);
3969 JITDUMP("> Not variable init\n");
3975 if (loop->lpFlags & LPFLG_CONST_LIMIT)
3977 int limit = loop->lpConstLimit();
3980 JITDUMP("> limit %d is invalid\n", limit);
3983 ident = LC_Ident(limit, LC_Ident::Const);
3985 else if (loop->lpFlags & LPFLG_VAR_LIMIT)
3987 unsigned limitLcl = loop->lpVarLimit();
3988 ident = LC_Ident(limitLcl, LC_Ident::Var);
3990 LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
3992 context->EnsureConditions(loopNum)->Push(geZero);
3994 else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
3996 ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
3997 if (!loop->lpArrLenLimit(this, index))
3999 JITDUMP("> ArrLen not matching");
4002 ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4004 // Ensure that this array must be dereference-able, before executing the actual condition.
4005 LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4006 context->EnsureDerefs(loopNum)->Push(array);
4010 JITDUMP("> Undetected limit\n");
4014 for (unsigned i = 0; i < optInfos->Size(); ++i)
4016 LcOptInfo* optInfo = optInfos->GetRef(i);
4017 switch (optInfo->GetOptType())
4019 case LcOptInfo::LcJaggedArray:
4022 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4023 LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4024 LC_Ident arrLenIdent = LC_Ident(arrLen);
4026 LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4027 context->EnsureConditions(loopNum)->Push(cond);
4029 // Ensure that this array must be dereference-able, before executing the actual condition.
4030 LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4031 context->EnsureDerefs(loopNum)->Push(array);
4034 case LcOptInfo::LcMdArray:
4036 // limit <= mdArrLen
4037 LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4038 LC_Condition cond(GT_LE, LC_Expr(ident),
4039 LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4040 mdArrInfo->GetArrIndexForDim(getAllocator()),
4041 mdArrInfo->dim, LC_Array::None))));
4042 context->EnsureConditions(loopNum)->Push(cond);
4047 JITDUMP("Unknown opt\n");
4051 JITDUMP("Conditions: (");
4052 DBEXEC(verbose, context->PrintConditions(loopNum));
4059 //------------------------------------------------------------------------------------
4060 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4063 // loopNum - the current loop index for which conditions are derived.
4064 // context - data structure where all loop cloning info is kept.
4067 // "false" if conditions cannot be obtained. "true" otherwise.
4068 // The deref conditions are updated in the "derefConditions"[loopNum] field
4069 // of the "context" parameter.
4071 // Definition of Deref Conditions:
4072 // To be able to check for the loop cloning condition that (limitVar <= a.len)
4073 // we should first be able to dereference "a". i.e., "a" is non-null.
4079 // for (k in 0..n) // Inner most loop is being cloned. Cloning needs to check if
4080 // // (n <= a[i][j].len) and other safer conditions to take the fast path
4083 // Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4084 // This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4085 // be true to do the deref.
4087 // (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4089 // Note the short circuiting AND. Implication: these conditions should be performed in separate
4090 // blocks each of which will branch to slow path if the condition evaluates to false.
4092 // Now, imagine a situation where we have
4093 // a[x][y][k] = 20 and a[i][j][k] = 0
4094 // also in the inner most loop where x, y are parameters, then our conditions will have
4098 // in addition to the above conditions (1) to get rid of bounds check on index 'k'
4100 // But these conditions can be checked together with conditions
4101 // (i < a.len) without a need for a separate block. In summary, the conditions will be:
4104 // ((i < a.len) & (x < a.len)) && <-- Note the bitwise AND here.
4105 // (a[i] != null & a[x] != null) && <-- Note the bitwise AND here.
4106 // (j < a[i].len & y < a[x].len) && <-- Note the bitwise AND here.
4107 // (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4109 // This naturally yields a tree style pattern, where the nodes of the tree are
4110 // the array and indices respectively.
4126 // Notice that the variables in the same levels can have their conditions combined in the
4127 // same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4128 // combined with a short-circuiting AND (i.e., different basic blocks).
4131 // Construct a tree of array indices and the array which will generate the optimal
4132 // conditions for loop cloning.
4134 // a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4149 // In this method, we will construct such a tree by descending depth first into the array
4150 // index operation and forming a tree structure as we encounter the array or the index variables.
4152 // This tree structure will then be used to generate conditions like below:
4153 // (a != null) & (b != null) && // from the first level of the tree.
4155 // (i < a.len) & (i < b.len) && // from the second level of the tree. Levels can be combined.
4156 // (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4158 // (j < a[i].len) & (y < a[i].len) && // from the third level.
4159 // (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4164 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4166 ExpandArrayStack<LC_Deref*> nodes(getAllocator());
4169 // Get the dereference-able arrays.
4170 ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4172 // For each array in the dereference list, construct a tree,
4173 // where the nodes are array and index variables and an edge 'u-v'
4174 // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4175 // 'u-v-w' transitively if u[v][w] occurs.
4176 for (unsigned i = 0; i < deref->Size(); ++i)
4178 LC_Array& array = (*deref)[i];
4180 // First populate the array base variable.
4181 LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4182 if (node == nullptr)
4184 node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4188 // For each dimension (level) for the array, populate the tree with the variable
4189 // from that dimension.
4190 unsigned rank = (unsigned)array.GetDimRank();
4191 for (unsigned i = 0; i < rank; ++i)
4193 node->EnsureChildren(getAllocator());
4194 LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4197 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4198 node->children->Push(tmp);
4201 // Descend one level down.
4205 // Keep the maxRank of all array dereferences.
4206 maxRank = max((int)rank, maxRank);
4212 for (unsigned i = 0; i < nodes.Size(); ++i)
4229 // First level will always yield the null-check, since it is made of the array base variables.
4230 // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4231 // So add 1 after rank * 2.
4232 unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4234 // Heuristic to not create too many blocks;
4240 // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4241 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
4242 for (unsigned i = 0; i < nodes.Size(); ++i)
4244 nodes[i]->DeriveLevelConditions(levelCond);
4247 DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4252 //----------------------------------------------------------------------------
4253 // optDebugLogLoopCloning: Insert a call to jithelper that prints a message.
4256 // block - the block in which the helper call needs to be inserted.
4257 // insertBefore - the tree before which the helper call will be inserted.
4259 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
4261 if (JitConfig.JitDebugLogLoopCloning() == 0)
4265 GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4266 GenTreePtr stmt = fgNewStmtFromTree(logCall);
4267 fgInsertStmtBefore(block, insertBefore, stmt);
4268 fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
4272 //------------------------------------------------------------------------
4273 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4274 // candidates gathered during the cloning phase.
4277 // loopNum - the current loop index for which the optimizations are performed.
4278 // context - data structure where all loop cloning info is kept.
4279 // dynamicPath - If true, the optimization is performed in the fast path among the
4280 // cloned loops. If false, it means this is the only path (i.e.,
4281 // there is no slow path.)
4284 // Perform the optimizations on the fast path i.e., the path in which the
4285 // optimization candidates were collected at the time of identifying them.
4286 // The candidates store all the information necessary (the tree/stmt/block
4287 // they are from) to perform the optimization.
4290 // The unoptimized path is either already cloned when this method is called or
4291 // there is no unoptimized path (got eliminated statically.) So this method
4292 // performs the optimizations assuming that the path in which the candidates
4293 // were collected is the fast path in which the optimizations will be performed.
4295 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4297 ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4298 for (unsigned i = 0; i < optInfos->Size(); ++i)
4300 LcOptInfo* optInfo = optInfos->GetRef(i);
4301 switch (optInfo->GetOptType())
4303 case LcOptInfo::LcJaggedArray:
4305 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4306 compCurBB = arrIndexInfo->arrIndex.useBlock;
4307 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
4309 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4312 case LcOptInfo::LcMdArray:
4313 // TODO-CQ: CLONE: Implement.
4321 //----------------------------------------------------------------------------
4322 // optCanCloneLoops: Use the environment flag to determine whether loop
4323 // cloning is allowed to be performed.
4326 // Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
4327 // Disabled for retail for now.
4329 bool Compiler::optCanCloneLoops()
4331 // Enabled for retail builds now.
4332 unsigned cloneLoopsFlag = 1;
4334 cloneLoopsFlag = JitConfig.JitCloneLoops();
4336 return (cloneLoopsFlag != 0);
4339 //----------------------------------------------------------------------------
4340 // optIsLoopClonable: Determine whether this loop can be cloned.
4343 // loopInd loop index which needs to be checked if it can be cloned.
4346 // Returns true if the loop can be cloned. If it returns false
4347 // prints a message in debug as why the loop can't be cloned.
4349 bool Compiler::optIsLoopClonable(unsigned loopInd)
4351 // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
4352 // inserting new EH regions in the exception table yet.
4353 BasicBlock* stopAt = optLoopTable[loopInd].lpBottom->bbNext;
4354 unsigned loopRetCount = 0;
4355 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
4357 if (blk->bbJumpKind == BBJ_RETURN)
4361 if (bbIsTryBeg(blk))
4363 JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
4368 // Is the entry block a handler or filter start? If so, then if we cloned, we could create a jump
4369 // into the middle of a handler (to go to the cloned copy.) Reject.
4370 if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
4372 JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
4376 // If the head and entry are in different EH regions, reject.
4377 if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
4379 JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
4383 // Is the first block after the last block of the loop a handler or filter start?
4384 // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
4385 // and go to where the original loop did. That raises problems when we don't actually go to
4386 // that block; this is one of those cases. This could be fixed fairly easily; for example,
4387 // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
4388 // loop. This is just a corner to cut to get this working faster.
4389 BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
4390 if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
4392 JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
4396 // We've previously made a decision whether to have separate return epilogs, or branch to one.
4397 // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
4398 // (I thought this was x86-specific, but it's not if-d. On other architectures, the decision should be made as a
4399 // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
4400 if (fgReturnCount + loopRetCount > 4)
4402 JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
4403 "would exceed the limit of 4.\n",
4404 loopRetCount, fgReturnCount);
4408 // Otherwise, we're going to add those return blocks.
4409 fgReturnCount += loopRetCount;
4414 /*****************************************************************************
4416 * Identify loop cloning opportunities, derive loop cloning conditions,
4417 * perform loop cloning, use the derived conditions to choose which
4420 void Compiler::optCloneLoops()
4422 JITDUMP("\n*************** In optCloneLoops()\n");
4423 if (optLoopCount == 0 || !optCanCloneLoops())
4431 printf("Blocks/Trees at start of phase\n");
4432 fgDispBasicBlocks(true);
4436 LoopCloneContext context(optLoopCount, getAllocator());
4438 // Obtain array optimization candidates in the context.
4439 optObtainLoopCloningOpts(&context);
4441 // For each loop, derive cloning conditions for the optimization candidates.
4442 for (unsigned i = 0; i < optLoopCount; ++i)
4444 ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
4445 if (optInfos == nullptr)
4450 if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
4452 JITDUMP("> Conditions could not be obtained\n");
4453 context.CancelLoopOptInfo(i);
4457 bool allTrue = false;
4458 bool anyFalse = false;
4459 context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
4462 context.CancelLoopOptInfo(i);
4466 // Perform static optimizations on the fast path since we always
4467 // have to take the cloned path.
4468 optPerformStaticOptimizations(i, &context DEBUGARG(false));
4470 // No need to clone.
4471 context.CancelLoopOptInfo(i);
4477 // The code in this #if has been useful in debugging loop cloning issues, by
4478 // enabling selective enablement of the loop cloning optimization according to
4481 unsigned methHash = info.compMethodHash();
4482 char* lostr = getenv("loopclonehashlo");
4483 unsigned methHashLo = 0;
4486 sscanf_s(lostr, "%x", &methHashLo);
4487 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
4489 char* histr = getenv("loopclonehashhi");
4490 unsigned methHashHi = UINT32_MAX;
4493 sscanf_s(histr, "%x", &methHashHi);
4494 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
4496 if (methHash < methHashLo || methHash > methHashHi)
4501 for (unsigned i = 0; i < optLoopCount; ++i)
4503 if (context.GetLoopOptInfo(i) != nullptr)
4506 context.OptimizeConditions(i DEBUGARG(verbose));
4507 context.OptimizeBlockConditions(i DEBUGARG(verbose));
4508 optCloneLoop(i, &context);
4515 printf("\nAfter loop cloning:\n");
4516 fgDispBasicBlocks(/*dumpTrees*/ true);
4521 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
4523 assert(loopInd < optLoopCount);
4525 JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
4526 optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
4527 optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
4529 // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
4530 unsigned depth = optLoopDepth(loopInd);
4531 unsigned ambientWeight = 1;
4532 for (unsigned j = 0; j < depth; j++)
4534 unsigned lastWeight = ambientWeight;
4535 ambientWeight *= BB_LOOP_WEIGHT;
4536 // If the multiplication overflowed, stick at max.
4537 // (Strictly speaking, a multiplication could overflow and still have a result
4538 // that is >= lastWeight...but if so, the original weight must be pretty large,
4539 // and it got bigger, so that's OK.)
4540 if (ambientWeight < lastWeight)
4542 ambientWeight = BB_MAX_WEIGHT;
4547 // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
4548 // Be safe by taking the max with the head block's weight.
4549 ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
4551 // This is the containing loop, if any -- to label any blocks we create that are outside
4552 // the loop being cloned.
4553 unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
4555 // First, make sure that the loop has a unique header block, creating an empty one if necessary.
4556 optEnsureUniqueHead(loopInd, ambientWeight);
4558 // We're going to make
4570 // H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
4582 BasicBlock* h = optLoopTable[loopInd].lpHead;
4583 if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
4585 // Make a new block to be the unique entry to the loop.
4586 assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
4587 BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
4588 /*extendRegion*/ true);
4589 newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
4590 BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
4591 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4592 newH->bbNatLoopNum = ambientLoop;
4594 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
4597 // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
4598 // "newPred" will be the predecessor of the blocks of the cloned loop.
4599 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4600 BasicBlock* newPred = b;
4601 if (b->bbJumpKind != BBJ_ALWAYS)
4603 BasicBlock* x = b->bbNext;
4606 BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
4607 x2->bbWeight = (x2->isRunRarely() ? 0 : ambientWeight);
4609 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4610 x2->bbNatLoopNum = ambientLoop;
4613 BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
4618 // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
4619 // so that "h" already falls through to "e" (e == t == f).
4620 BasicBlock* h2 = nullptr;
4621 if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
4623 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
4624 /*extendRegion*/ true);
4625 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4627 // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
4628 h2->bbNatLoopNum = ambientLoop;
4630 h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
4631 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4634 // Now we'll clone the blocks of the loop body.
4635 BasicBlock* newFirst = nullptr;
4636 BasicBlock* newBot = nullptr;
4638 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4639 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4642 BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
4643 /*extendRegion*/ true);
4645 BasicBlock::CloneBlockState(this, newBlk, blk);
4646 // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
4647 // the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
4648 // loop, if one exists -- the parent of the loop we're cloning.
4649 newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4651 if (newFirst == nullptr)
4655 newBot = newBlk; // Continually overwrite to make sure we get the last one.
4657 blockMap->Set(blk, newBlk);
4660 // Perform the static optimizations on the fast path.
4661 optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
4663 // Now go through the new blocks, remapping their jump targets within the loop.
4664 for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
4668 BasicBlock* newblk = nullptr;
4669 bool b = blockMap->Lookup(blk, &newblk);
4670 assert(b && newblk != nullptr);
4672 assert(blk->bbJumpKind == newblk->bbJumpKind);
4674 // First copy the jump destination(s) from "blk".
4675 optCopyBlkDest(blk, newblk);
4677 // Now redirect the new block according to "blockMap".
4678 optRedirectBlock(newblk, blockMap);
4681 assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
4682 (h->bbJumpKind == BBJ_ALWAYS));
4684 // If all the conditions are true, go to E2.
4685 BasicBlock* e2 = nullptr;
4686 bool foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
4688 h->bbJumpKind = BBJ_COND;
4690 // We will create the following structure
4692 // cond0 (in h) -?> cond1
4693 // slow --> e2 (slow) always
4700 // We should always have block conditions, at the minimum, the array should be deref-able
4701 assert(context->HasBlockConditions(loopInd));
4703 // Create a unique header for the slow path.
4704 BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
4705 slowHead->bbWeight = (h->isRunRarely() ? 0 : ambientWeight);
4706 slowHead->bbNatLoopNum = ambientLoop;
4707 slowHead->bbJumpDest = e2;
4709 BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
4710 condLast->bbJumpDest = slowHead;
4712 // If h2 is present it is already the head or replace 'h' by 'condLast'.
4715 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
4717 assert(foundIt && e2 != nullptr);
4719 fgUpdateChangedFlowGraph();
4722 //--------------------------------------------------------------------------------------------------
4723 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
4726 // context loop cloning context variable
4727 // loopNum the loop index
4728 // head loop head for "loopNum"
4729 // slowHead the slow path loop head
4735 // Create the following structure.
4737 // Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
4738 // condn cannot jtrue to loop head h2. It has to be from a direct pred block.
4740 // cond0 (in h) -?> cond1
4741 // slowHead --> e2 (slowHead) always
4742 // !cond1 -?> slowHead
4743 // !cond2 -?> slowHead
4745 // !condn -?> slowHead
4748 // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
4750 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
4753 BasicBlock* slowHead)
4755 JITDUMP("Inserting loop cloning conditions\n");
4756 assert(context->HasBlockConditions(loopNum));
4758 BasicBlock* curCond = head;
4759 ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
4760 for (unsigned i = 0; i < levelCond->Size(); ++i)
4762 bool isHeaderBlock = (curCond == head);
4764 // Flip the condition if header block.
4765 context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
4767 // Create each condition block ensuring wiring between them.
4768 BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
4769 curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
4772 curCond->inheritWeight(head);
4773 curCond->bbNatLoopNum = head->bbNatLoopNum;
4774 JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
4777 // Finally insert cloning conditions after all deref conditions have been inserted.
4778 context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
4782 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
4784 BasicBlock* h = optLoopTable[loopInd].lpHead;
4785 BasicBlock* t = optLoopTable[loopInd].lpTop;
4786 BasicBlock* e = optLoopTable[loopInd].lpEntry;
4787 BasicBlock* b = optLoopTable[loopInd].lpBottom;
4789 // If "h" dominates the entry block, then it is the unique header.
4790 if (fgDominate(h, e))
4795 // Otherwise, create a new empty header block, make it the pred of the entry block,
4796 // and redirect the preds of the entry block to go to this.
4798 BasicBlock* beforeTop = t->bbPrev;
4799 // Make sure that the new block is in the same region as the loop.
4800 // (We will only create loops that are entirely within a region.)
4801 BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
4802 // This is in the containing loop.
4803 h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
4804 h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
4806 // We don't care where it was put; splice it between beforeTop and top.
4807 if (beforeTop->bbNext != h2)
4809 h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
4810 beforeTop->setNext(h2); // Splice h2 in, between beforeTop and t.
4814 if (h2->bbNext != e)
4816 h2->bbJumpKind = BBJ_ALWAYS;
4819 BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
4821 // Redirect paths from preds of "e" to go to "h2" instead of "e".
4822 BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
4823 blockMap->Set(e, h2);
4825 for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
4827 BasicBlock* predBlock = predEntry->flBlock;
4829 // Skip if predBlock is in the loop.
4830 if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
4834 optRedirectBlock(predBlock, blockMap);
4837 optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
4840 /*****************************************************************************
4842 * Determine the kind of interference for the call.
4845 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
4847 assert(call->gtOper == GT_CALL);
4849 // if not a helper, kills everything
4850 if (call->gtCall.gtCallType != CT_HELPER)
4855 // setfield and array address store kill all indirections
4856 switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
4858 case CORINFO_HELP_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4859 case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
4860 case CORINFO_HELP_ASSIGN_BYREF: // Not strictly needed as we don't make a GT_CALL with this
4861 case CORINFO_HELP_SETFIELDOBJ:
4862 case CORINFO_HELP_ARRADDR_ST:
4864 return CALLINT_REF_INDIRS;
4866 case CORINFO_HELP_SETFIELDFLOAT:
4867 case CORINFO_HELP_SETFIELDDOUBLE:
4868 case CORINFO_HELP_SETFIELD8:
4869 case CORINFO_HELP_SETFIELD16:
4870 case CORINFO_HELP_SETFIELD32:
4871 case CORINFO_HELP_SETFIELD64:
4873 return CALLINT_SCL_INDIRS;
4875 case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
4876 case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this
4877 case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this
4878 case CORINFO_HELP_SETFIELDSTRUCT:
4880 return CALLINT_ALL_INDIRS;
4886 // other helpers kill nothing
4887 return CALLINT_NONE;
4890 /*****************************************************************************
4892 * See if the given tree can be computed in the given precision (which must
4893 * be smaller than the type of the tree for this to make sense). If 'doit'
4894 * is false, we merely check to see whether narrowing is possible; if we
4895 * get called with 'doit' being true, we actually perform the narrowing.
4898 bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
4904 noway_assert(genActualType(tree->gtType) == genActualType(srct));
4906 /* Assume we're only handling integer types */
4907 noway_assert(varTypeIsIntegral(srct));
4908 noway_assert(varTypeIsIntegral(dstt));
4910 unsigned srcSize = genTypeSize(srct);
4911 unsigned dstSize = genTypeSize(dstt);
4913 /* dstt must be smaller than srct to narrow */
4914 if (dstSize >= srcSize)
4919 /* Figure out what kind of a node we have */
4920 oper = tree->OperGet();
4921 kind = tree->OperKind();
4923 if (kind & GTK_ASGOP)
4925 noway_assert(doit == false);
4929 ValueNumPair NoVNPair = ValueNumPair();
4931 if (kind & GTK_LEAF)
4935 /* Constants can usually be narrowed by changing their value */
4936 CLANG_FORMAT_COMMENT_ANCHOR;
4938 #ifndef _TARGET_64BIT_
4943 lval = tree->gtIntConCommon.LngValue();
4972 if ((lval & lmask) != lval)
4977 tree->ChangeOperConst(GT_CNS_INT);
4978 tree->gtType = TYP_INT;
4979 tree->gtIntCon.gtIconVal = (int)lval;
4980 if (vnStore != nullptr)
4982 fgValueNumberTreeConst(tree);
4992 ival = tree->gtIntCon.gtIconVal;
5011 #ifdef _TARGET_64BIT_
5018 #endif // _TARGET_64BIT_
5023 if ((ival & imask) != ival)
5028 #ifdef _TARGET_64BIT_
5031 tree->gtType = TYP_INT;
5032 tree->gtIntCon.gtIconVal = (int)ival;
5033 if (vnStore != nullptr)
5035 fgValueNumberTreeConst(tree);
5038 #endif // _TARGET_64BIT_
5042 /* Operands that are in memory can usually be narrowed
5043 simply by changing their gtType */
5046 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5047 if (dstSize == sizeof(int))
5060 noway_assert(doit == false);
5064 if (kind & (GTK_BINOP | GTK_UNOP))
5067 op1 = tree->gtOp.gtOp1;
5069 op2 = tree->gtOp.gtOp2;
5071 switch (tree->gtOper)
5074 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5076 // Is op2 a small constant than can be narrowed into dstt?
5077 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5078 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5080 // We will change the type of the tree and narrow op2
5084 tree->gtType = genActualType(dstt);
5085 tree->SetVNs(vnpNarrow);
5087 optNarrowTree(op2, srct, dstt, NoVNPair, true);
5088 // We may also need to cast away the upper bits of op1
5091 assert(tree->gtType == TYP_INT);
5092 op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
5094 op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5096 tree->gtOp.gtOp1 = op1;
5107 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5109 noway_assert(doit == false);
5117 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5118 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5120 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5121 !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5123 noway_assert(doit == false);
5127 /* Simply change the type of the tree */
5131 if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5133 tree->gtFlags &= ~GTF_MUL_64RSLT;
5136 tree->gtType = genActualType(dstt);
5137 tree->SetVNs(vnpNarrow);
5145 /* Simply change the type of the tree */
5147 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5149 tree->gtType = genSignedType(dstt);
5150 tree->SetVNs(vnpNarrow);
5152 /* Make sure we don't mess up the variable type */
5153 if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5155 tree->gtFlags |= GTF_VAR_CAST;
5168 /* These can always be narrowed since they only represent 0 or 1 */
5173 var_types cast = tree->CastToType();
5174 var_types oprt = op1->TypeGet();
5175 unsigned oprSize = genTypeSize(oprt);
5182 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5187 if (tree->gtOverflow())
5192 /* Is this a cast from the type we're narrowing to or a smaller one? */
5194 if (oprSize <= dstSize)
5196 /* Bash the target type of the cast */
5200 dstt = genSignedType(dstt);
5202 if (oprSize == dstSize)
5204 // Same size: change the CAST into a NOP
5205 tree->ChangeOper(GT_NOP);
5206 tree->gtType = dstt;
5207 tree->gtOp.gtOp2 = nullptr;
5208 tree->gtVNPair = op1->gtVNPair; // Set to op1's ValueNumber
5212 // oprSize is smaller
5213 assert(oprSize < dstSize);
5215 // Change the CastToType in the GT_CAST node
5216 tree->CastToType() = dstt;
5218 // The result type of a GT_CAST is never a small type.
5219 // Use genActualType to widen dstt when it is a small types.
5220 tree->gtType = genActualType(dstt);
5221 tree->SetVNs(vnpNarrow);
5231 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5233 /* Simply change the type of the tree */
5237 tree->gtType = genActualType(dstt);
5238 tree->SetVNs(vnpNarrow);
5245 noway_assert(doit == false);
5253 /*****************************************************************************
5255 * The following logic figures out whether the given variable is assigned
5256 * somewhere in a list of basic blocks (or in an entire loop).
5259 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
5261 GenTreePtr tree = *pTree;
5263 if (tree->OperKind() & GTK_ASGOP)
5265 GenTreePtr dest = tree->gtOp.gtOp1;
5266 genTreeOps destOper = dest->OperGet();
5268 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5269 assert(desc && desc->ivaSelf == desc);
5271 if (destOper == GT_LCL_VAR)
5273 unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5274 if (tvar < lclMAX_ALLSET_TRACKED)
5276 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5280 desc->ivaMaskIncomplete = true;
5283 if (tvar == desc->ivaVar)
5285 if (tree != desc->ivaSkip)
5291 else if (destOper == GT_LCL_FLD)
5293 /* We can't track every field of every var. Moreover, indirections
5294 may access different parts of the var as different (but
5295 overlapping) fields. So just treat them as indirect accesses */
5297 // unsigned lclNum = dest->gtLclFld.gtLclNum;
5298 // noway_assert(lvaTable[lclNum].lvAddrTaken);
5300 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5301 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5303 else if (destOper == GT_CLS_VAR)
5305 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
5307 else if (destOper == GT_IND)
5309 /* Set the proper indirection bits */
5311 varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
5312 desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
5315 else if (tree->gtOper == GT_CALL)
5317 isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5318 assert(desc && desc->ivaSelf == desc);
5320 desc->ivaMaskCall = optCallInterf(tree);
5323 return WALK_CONTINUE;
5326 /*****************************************************************************/
5328 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
5333 desc.ivaSkip = skip;
5335 desc.ivaSelf = &desc;
5338 desc.ivaMaskCall = CALLINT_NONE;
5339 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5345 for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
5347 noway_assert(stmt->gtOper == GT_STMT);
5348 if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
5370 /*****************************************************************************/
5371 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
5375 /* Get hold of the loop descriptor */
5377 noway_assert(lnum < optLoopCount);
5378 loop = optLoopTable + lnum;
5380 /* Do we already know what variables are assigned within this loop? */
5382 if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
5389 /* Prepare the descriptor used by the tree walker call-back */
5391 desc.ivaVar = (unsigned)-1;
5392 desc.ivaSkip = nullptr;
5394 desc.ivaSelf = &desc;
5396 AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
5397 desc.ivaMaskInd = VR_NONE;
5398 desc.ivaMaskCall = CALLINT_NONE;
5399 desc.ivaMaskIncomplete = false;
5401 /* Now walk all the statements of the loop */
5403 beg = loop->lpHead->bbNext;
5404 end = loop->lpBottom;
5406 for (/**/; /**/; beg = beg->bbNext)
5410 for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
5412 noway_assert(stmt->gtOper == GT_STMT);
5413 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
5415 if (desc.ivaMaskIncomplete)
5417 loop->lpFlags |= LPFLG_ASGVARS_INC;
5427 AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
5428 loop->lpAsgInds = desc.ivaMaskInd;
5429 loop->lpAsgCall = desc.ivaMaskCall;
5431 /* Now we know what variables are assigned in the loop */
5433 loop->lpFlags |= LPFLG_ASGVARS_YES;
5436 /* Now we can finally test the caller's mask against the loop's */
5437 if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
5442 switch (loop->lpAsgCall)
5446 /* Can't hoist if the call might have side effect on an indirection. */
5448 if (loop->lpAsgInds != VR_NONE)
5455 case CALLINT_REF_INDIRS:
5457 /* Can't hoist if the call might have side effect on an ref indirection. */
5459 if (loop->lpAsgInds & VR_IND_REF)
5466 case CALLINT_SCL_INDIRS:
5468 /* Can't hoist if the call might have side effect on an non-ref indirection. */
5470 if (loop->lpAsgInds & VR_IND_SCL)
5477 case CALLINT_ALL_INDIRS:
5479 /* Can't hoist if the call might have side effect on any indirection. */
5481 if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
5490 /* Other helpers kill nothing */
5495 noway_assert(!"Unexpected lpAsgCall value");
5501 void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
5506 printf("\nHoisting a copy of ");
5507 printTreeID(origExpr);
5508 printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
5509 optLoopTable[lnum].lpBottom->bbNum);
5510 gtDispTree(origExpr);
5515 // This loop has to be in a form that is approved for hoisting.
5516 assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
5518 // Create a copy of the expression and mark it for CSE's.
5519 GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
5521 // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
5522 assert(hoistExpr != origExpr);
5523 assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
5525 GenTreePtr hoist = hoistExpr;
5526 // The value of the expression isn't used (unless it's an assignment).
5527 if (hoistExpr->OperGet() != GT_ASG)
5529 hoist = gtUnusedValNode(hoistExpr);
5532 /* Put the statement in the preheader */
5534 fgCreateLoopPreHeader(lnum);
5536 BasicBlock* preHead = optLoopTable[lnum].lpHead;
5537 assert(preHead->bbJumpKind == BBJ_NONE);
5539 // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
5540 // (or in this case, will contain) the expression.
5541 compCurBB = preHead;
5543 // Increment the ref counts of any local vars appearing in "hoist".
5544 // Note that we need to do this before fgMorphTree() as fgMorph() could constant
5545 // fold away some of the lcl vars referenced by "hoist".
5546 lvaRecursiveIncRefCounts(hoist);
5548 hoist = fgMorphTree(hoist);
5550 GenTreePtr hoistStmt = gtNewStmt(hoist);
5551 hoistStmt->gtFlags |= GTF_STMT_CMPADD;
5553 /* simply append the statement at the end of the preHead's list */
5555 GenTreePtr treeList = preHead->bbTreeList;
5559 /* append after last statement */
5561 GenTreePtr last = treeList->gtPrev;
5562 assert(last->gtNext == nullptr);
5564 last->gtNext = hoistStmt;
5565 hoistStmt->gtPrev = last;
5566 treeList->gtPrev = hoistStmt;
5570 /* Empty pre-header - store the single statement in the block */
5572 preHead->bbTreeList = hoistStmt;
5573 hoistStmt->gtPrev = hoistStmt;
5576 hoistStmt->gtNext = nullptr;
5581 printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
5586 if (fgStmtListThreaded)
5588 gtSetStmtInfo(hoistStmt);
5589 fgSetStmtSeq(hoistStmt);
5593 if (m_nodeTestData != nullptr)
5596 // What is the depth of the loop "lnum"?
5598 unsigned lnumIter = lnum;
5599 while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
5602 lnumIter = optLoopTable[lnumIter].lpParent;
5605 NodeToTestDataMap* testData = GetNodeTestData();
5607 TestLabelAndNum tlAndN;
5608 if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
5610 if (tlAndN.m_num == -1)
5613 printTreeID(origExpr);
5614 printf(" was declared 'do not hoist', but is being hoisted.\n");
5617 else if (tlAndN.m_num != depth)
5620 printTreeID(origExpr);
5621 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
5623 tlAndN.m_num, depth);
5628 // We've correctly hoisted this, so remove the annotation. Later, we'll check for any remaining "must
5629 // hoist" annotations.
5630 testData->Remove(origExpr);
5631 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
5632 tlAndN.m_tl = TL_CSE_Def;
5633 tlAndN.m_num = m_loopHoistCSEClass++;
5634 testData->Set(hoistExpr, tlAndN);
5640 #if LOOP_HOIST_STATS
5641 if (!m_curLoopHasHoistedExpression)
5643 m_loopsWithHoistedExpressions++;
5644 m_curLoopHasHoistedExpression = true;
5646 m_totalHoistedExpressions++;
5647 #endif // LOOP_HOIST_STATS
5650 void Compiler::optHoistLoopCode()
5652 // If we don't have any loops in the method then take an early out now.
5653 if (optLoopCount == 0)
5659 unsigned jitNoHoist = JitConfig.JitNoHoist();
5667 // The code in this #if has been useful in debugging loop cloning issues, by
5668 // enabling selective enablement of the loop cloning optimization according to
5671 unsigned methHash = info.compMethodHash();
5672 char* lostr = getenv("loophoisthashlo");
5673 unsigned methHashLo = 0;
5676 sscanf_s(lostr, "%x", &methHashLo);
5677 // methHashLo = (unsigned(atoi(lostr)) << 2); // So we don't have to use negative numbers.
5679 char* histr = getenv("loophoisthashhi");
5680 unsigned methHashHi = UINT32_MAX;
5683 sscanf_s(histr, "%x", &methHashHi);
5684 // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers.
5686 if (methHash < methHashLo || methHash > methHashHi)
5688 printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
5690 #endif // 0 -- debugging loop cloning issues
5695 printf("\n*************** In optHoistLoopCode()\n");
5696 printf("Blocks/Trees before phase\n");
5697 fgDispBasicBlocks(true);
5702 // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
5703 // they are invariant.)
5704 LoopHoistContext hoistCtxt(this);
5705 for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
5707 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
5712 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
5714 optHoistLoopNest(lnum, &hoistCtxt);
5723 printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
5724 fgDispBasicBlocks(true);
5728 // Make sure that the predecessor lists are accurate
5729 fgDebugCheckBBlist();
5734 // Test Data stuff..
5735 // If we have no test data, early out.
5736 if (m_nodeTestData == nullptr)
5740 NodeToTestDataMap* testData = GetNodeTestData();
5741 for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
5743 TestLabelAndNum tlAndN;
5744 GenTreePtr node = ki.Get();
5745 bool b = testData->Lookup(node, &tlAndN);
5747 if (tlAndN.m_tl != TL_LoopHoist)
5751 // Otherwise, it is a loop hoist annotation.
5752 assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
5753 if (tlAndN.m_num >= 0)
5757 printf(" was declared 'must hoist', but has not been hoisted.\n");
5764 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
5766 // Do this loop, then recursively do all nested loops.
5767 CLANG_FORMAT_COMMENT_ANCHOR;
5769 #if LOOP_HOIST_STATS
5771 m_curLoopHasHoistedExpression = false;
5772 m_loopsConsidered++;
5773 #endif // LOOP_HOIST_STATS
5775 optHoistThisLoop(lnum, hoistCtxt);
5777 VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
5779 if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
5781 // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
5782 // TODO-Cleanup: we should have a set abstraction for loops.
5783 if (hoistedInCurLoop != nullptr)
5785 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5789 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
5791 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
5795 for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
5796 child = optLoopTable[child].lpSibling)
5798 optHoistLoopNest(child, hoistCtxt);
5802 // TODO-Cleanup: we should have a set abstraction for loops.
5803 if (hoistedInCurLoop != nullptr)
5805 for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
5807 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
5808 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
5814 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
5816 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5818 /* If loop was removed continue */
5820 if (pLoopDsc->lpFlags & LPFLG_REMOVED)
5825 /* Get the head and tail of the loop */
5827 BasicBlock* head = pLoopDsc->lpHead;
5828 BasicBlock* tail = pLoopDsc->lpBottom;
5829 BasicBlock* lbeg = pLoopDsc->lpEntry;
5832 // We must have a do-while loop
5833 if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
5838 // The loop-head must dominate the loop-entry.
5839 // TODO-CQ: Couldn't we make this true if it's not?
5840 if (!fgDominate(head, lbeg))
5845 // if lbeg is the start of a new try block then we won't be able to hoist
5846 if (!BasicBlock::sameTryRegion(head, lbeg))
5851 // We don't bother hoisting when inside of a catch block
5852 if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
5857 pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
5859 unsigned begn = lbeg->bbNum;
5860 unsigned endn = tail->bbNum;
5862 // Ensure the per-loop sets/tables are empty.
5863 hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
5868 printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
5869 printf(" Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
5873 VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
5875 pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
5876 pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars);
5877 pLoopDsc->lpHoistedExprCount = 0;
5879 #ifndef _TARGET_64BIT_
5880 unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
5882 if (longVarsCount > 0)
5884 // Since 64-bit variables take up two registers on 32-bit targets, we increase
5885 // the Counts such that each TYP_LONG variable counts twice.
5887 VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
5888 VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
5893 printf("\n LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
5894 lvaDispVarSet(lvaLongVars);
5897 pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
5898 pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
5900 #endif // !_TARGET_64BIT_
5905 printf("\n USEDEF (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
5906 lvaDispVarSet(pLoopDsc->lpVarUseDef);
5908 printf("\n INOUT (%d)=", pLoopDsc->lpVarInOutCount);
5909 lvaDispVarSet(pLoopDsc->lpVarInOut);
5911 printf("\n LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
5912 lvaDispVarSet(loopVars);
5917 unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
5919 if (floatVarsCount > 0)
5921 VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
5922 VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
5924 pLoopDsc->lpLoopVarFPCount = VarSetOps::Count(this, loopFPVars);
5925 pLoopDsc->lpVarInOutFPCount = VarSetOps::Count(this, inOutFPVars);
5926 pLoopDsc->lpHoistedFPExprCount = 0;
5928 pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
5929 pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
5934 printf(" INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
5935 lvaDispVarSet(inOutFPVars);
5937 printf("\n LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
5938 lvaDispVarSet(loopFPVars);
5942 else // (floatVarsCount == 0)
5944 pLoopDsc->lpLoopVarFPCount = 0;
5945 pLoopDsc->lpVarInOutFPCount = 0;
5946 pLoopDsc->lpHoistedFPExprCount = 0;
5949 // Find the set of definitely-executed blocks.
5950 // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
5951 // Until we have post-dominators, we'll special-case for single-exit blocks.
5952 ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
5953 if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
5955 assert(pLoopDsc->lpExit != nullptr);
5956 BasicBlock* cur = pLoopDsc->lpExit;
5957 // Push dominators, until we reach "entry" or exit the loop.
5958 while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
5963 // If we didn't reach the entry block, give up and *just* push the entry block.
5964 if (cur != pLoopDsc->lpEntry)
5968 defExec.Push(pLoopDsc->lpEntry);
5970 else // More than one exit
5972 // We'll assume that only the entry block is definitely executed.
5973 // We could in the future do better.
5974 defExec.Push(pLoopDsc->lpEntry);
5977 while (defExec.Size() > 0)
5979 // Consider in reverse order: dominator before dominatee.
5980 BasicBlock* blk = defExec.Pop();
5981 optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
5985 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
5986 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
5988 LoopDsc* pLoopDsc = &optLoopTable[lnum];
5989 bool firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
5990 unsigned blkWeight = blk->getBBWeight(this);
5995 printf(" optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
5996 blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
5997 firstBlockAndBeforeSideEffect ? "true" : "false");
5998 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6000 printf(" block weight is too small to perform hoisting.\n");
6005 if (blkWeight < (BB_UNITY_WEIGHT / 10))
6007 // Block weight is too small to perform hoisting.
6011 for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6013 GenTreePtr stmtTree = stmt->gtStmtExpr;
6015 (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
6018 // we will try to hoist the top-level stmtTree
6019 optHoistCandidate(stmtTree, lnum, hoistCtxt);
6024 bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
6026 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6028 bool loopContainsCall = pLoopDsc->lpContainsCall;
6031 int hoistedExprCount;
6035 if (varTypeIsFloating(tree->TypeGet()))
6037 hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6038 loopVarCount = pLoopDsc->lpLoopVarFPCount;
6039 varInOutCount = pLoopDsc->lpVarInOutFPCount;
6041 availRegCount = CNT_CALLEE_SAVED_FLOAT;
6042 if (!loopContainsCall)
6044 availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6047 // For ARM each double takes two FP registers
6048 // For now on ARM we won't track singles/doubles
6049 // and instead just assume that we always have doubles.
6056 hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6057 loopVarCount = pLoopDsc->lpLoopVarCount;
6058 varInOutCount = pLoopDsc->lpVarInOutCount;
6060 availRegCount = CNT_CALLEE_SAVED - 1;
6061 if (!loopContainsCall)
6063 availRegCount += CNT_CALLEE_TRASH - 1;
6065 #ifndef _TARGET_64BIT_
6066 // For our 32-bit targets Long types take two registers.
6067 if (varTypeIsLong(tree->TypeGet()))
6069 availRegCount = (availRegCount + 1) / 2;
6074 // decrement the availRegCount by the count of expression that we have already hoisted.
6075 availRegCount -= hoistedExprCount;
6077 // the variables that are read/written inside the loop should
6078 // always be a subset of the InOut variables for the loop
6079 assert(loopVarCount <= varInOutCount);
6081 // When loopVarCount >= availRegCount we believe that all of the
6082 // available registers will get used to hold LclVars inside the loop.
6083 // This pessimistically assumes that each loopVar has a conflicting
6084 // lifetime with every other loopVar.
6085 // For this case we will hoist the expression only if is profitable
6086 // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6087 // as we believe it will be placed in the stack or one of the other
6088 // loopVars will be spilled into the stack
6090 if (loopVarCount >= availRegCount)
6092 // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6093 if (tree->gtCostEx < (2 * IND_COST_EX))
6099 // When varInOutCount < availRegCount we are know that there are
6100 // some available register(s) when we enter the loop body.
6101 // When varInOutCount == availRegCount there often will be a register
6102 // available when we enter the loop body, since a loop often defines a
6103 // LclVar on exit or there is often at least one LclVar that is worth
6104 // spilling to the stack to make way for this hoisted expression.
6105 // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6107 if (varInOutCount > availRegCount)
6109 // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6110 if (tree->gtCostEx <= MIN_CSE_COST + 1)
6120 // This function returns true if 'tree' is a loop invariant expression.
6121 // It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
6123 bool Compiler::optHoistLoopExprsForTree(
6124 GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
6126 // First do the children.
6127 // We must keep track of whether each child node was hoistable or not
6129 unsigned nChildren = tree->NumChildren();
6130 bool childrenHoistable[GenTree::MAX_CHILDREN];
6132 // Initialize the array elements for childrenHoistable[] to false
6133 for (unsigned i = 0; i < nChildren; i++)
6135 childrenHoistable[i] = false;
6138 bool treeIsInvariant = true;
6139 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6141 if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6142 &childrenHoistable[childNum]))
6144 treeIsInvariant = false;
6148 // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
6150 bool treeIsHoistable = treeIsInvariant;
6152 // But we must see if anything else prevents "tree" from being hoisted.
6154 if (treeIsInvariant)
6156 // Tree must be a suitable CSE candidate for us to be able to hoist it.
6157 treeIsHoistable = optIsCSEcandidate(tree);
6159 // If it's a call, it must be a helper call, and be pure.
6160 // Further, if it may run a cctor, it must be labeled as "Hoistable"
6161 // (meaning it won't run a cctor because the class is not precise-init).
6162 if (treeIsHoistable && tree->OperGet() == GT_CALL)
6164 GenTreeCall* call = tree->AsCall();
6165 if (call->gtCallType != CT_HELPER)
6167 treeIsHoistable = false;
6171 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6172 if (!s_helperCallProperties.IsPure(helpFunc))
6174 treeIsHoistable = false;
6176 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6178 treeIsHoistable = false;
6183 if (treeIsHoistable)
6185 if (!(*pFirstBlockAndBeforeSideEffect))
6187 // For now, we give up on an expression that might raise an exception if it is after the
6188 // first possible global side effect (and we assume we're after that if we're not in the first block).
6189 // TODO-CQ: this is when we might do loop cloning.
6191 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6193 treeIsHoistable = false;
6196 // Currently we must give up on reads from static variables (even if we are in the first block).
6198 if (tree->OperGet() == GT_CLS_VAR)
6200 // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
6202 treeIsHoistable = false;
6206 // Is the value of the whole tree loop invariant?
6208 optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6210 // Is the value of the whole tree loop invariant?
6211 if (!treeIsInvariant)
6213 treeIsHoistable = false;
6217 // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6218 // If we encounter a tree with a call in it
6219 // or if we see an assignment to global we set it to false.
6221 // If we are already set to false then we can skip these checks
6223 if (*pFirstBlockAndBeforeSideEffect)
6225 // For this purpose, we only care about memory side effects. We assume that expressions will
6226 // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6227 // and therefore throw exceptions in the same order. (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6228 // here, since that includes exceptions.)
6229 if (tree->gtFlags & GTF_CALL)
6231 *pFirstBlockAndBeforeSideEffect = false;
6233 else if (tree->OperIsAssignment())
6235 // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6236 GenTreePtr lhs = tree->gtOp.gtOp1;
6237 if (lhs->gtFlags & GTF_GLOB_REF)
6239 *pFirstBlockAndBeforeSideEffect = false;
6242 else if (tree->OperIsCopyBlkOp())
6244 GenTreePtr args = tree->gtOp.gtOp1;
6245 assert(args->OperGet() == GT_LIST);
6246 if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
6248 *pFirstBlockAndBeforeSideEffect = false;
6253 // If this 'tree' is hoistable then we return and the caller will
6254 // decide to hoist it as part of larger hoistable expression.
6256 if (!treeIsHoistable)
6258 // We are not hoistable so we will now hoist any hoistable children.
6260 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6262 if (childrenHoistable[childNum])
6264 // We can't hoist the LHS of an assignment, isn't a real use.
6265 if (childNum == 0 && (tree->OperIsAssignment()))
6270 GenTreePtr child = tree->GetChild(childNum);
6272 // We try to hoist this 'child' tree
6273 optHoistCandidate(child, lnum, hoistCtxt);
6278 *pHoistable = treeIsHoistable;
6279 return treeIsInvariant;
6282 void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
6284 if (lnum == BasicBlock::NOT_IN_LOOP)
6286 // The hoisted expression isn't valid at any loop head so don't hoist this expression.
6290 // The outer loop also must be suitable for hoisting...
6291 if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
6296 // If the hoisted expression isn't valid at this loop head then break
6297 if (!optTreeIsValidAtLoopHead(tree, lnum))
6302 // It must pass the hoistable profitablity tests for this loop level
6303 if (!optIsProfitableToHoistableTree(tree, lnum))
6309 if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
6311 // already hoisted in a parent loop, so don't hoist this expression.
6315 if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
6317 // already hoisted this expression in the current loop, so don't hoist this expression.
6321 // Expression can be hoisted
6322 optPerformHoistExpr(tree, lnum);
6324 // Increment lpHoistedExprCount or lpHoistedFPExprCount
6325 if (!varTypeIsFloating(tree->TypeGet()))
6327 optLoopTable[lnum].lpHoistedExprCount++;
6328 #ifndef _TARGET_64BIT_
6329 // For our 32-bit targets Long types take two registers.
6330 if (varTypeIsLong(tree->TypeGet()))
6332 optLoopTable[lnum].lpHoistedExprCount++;
6336 else // Floating point expr hoisted
6338 optLoopTable[lnum].lpHoistedFPExprCount++;
6341 // Record the hoisted expression in hoistCtxt
6342 hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
6345 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
6347 // If it is not a VN, is not loop-invariant.
6348 if (vn == ValueNumStore::NoVN)
6353 // We'll always short-circuit constants.
6354 if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
6359 // If we've done this query previously, don't repeat.
6360 bool previousRes = false;
6361 if (loopVnInvariantCache->Lookup(vn, &previousRes))
6368 if (vnStore->GetVNFunc(vn, &funcApp))
6370 if (funcApp.m_func == VNF_PhiDef)
6372 // First, make sure it's a "proper" phi -- the definition is a Phi application.
6373 VNFuncApp phiDefValFuncApp;
6374 if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
6376 // It's not *really* a definition, rather a pass-through of some other VN.
6377 // (This could occur, say if both sides of an if-then-else diamond made the
6378 // same assignment to a variable.)
6379 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
6383 // Is the definition within the loop? If so, is not loop-invariant.
6384 unsigned lclNum = funcApp.m_args[0];
6385 unsigned ssaNum = funcApp.m_args[1];
6386 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
6387 res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
6390 else if (funcApp.m_func == VNF_PhiHeapDef)
6392 BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
6393 res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
6397 for (unsigned i = 0; i < funcApp.m_arity; i++)
6399 // TODO-CQ: We need to either make sure that *all* VN functions
6400 // always take VN args, or else have a list of arg positions to exempt, as implicitly
6402 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
6412 // Non-function "new, unique" VN's may be annotated with the loop nest where
6413 // their definition occurs.
6414 BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
6416 if (vnLoopNum == MAX_LOOP_NUM)
6422 res = !optLoopContains(lnum, vnLoopNum);
6426 loopVnInvariantCache->Set(vn, res);
6430 bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
6432 if (tree->OperIsLocal())
6434 GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
6435 unsigned lclNum = lclVar->gtLclNum;
6437 // The lvlVar must be have an Ssa tracked lifetime
6438 if (fgExcludeFromSsa(lclNum))
6443 // If the loop does not contains the SSA def we can hoist it.
6444 if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
6449 else if (tree->OperIsConst())
6453 else // If every one of the children nodes are valid at this Loop's Head.
6455 unsigned nChildren = tree->NumChildren();
6456 for (unsigned childNum = 0; childNum < nChildren; childNum++)
6458 if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
6468 /*****************************************************************************
6470 * Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
6471 * header. The pre-header will replace the current lpHead in the loop table.
6472 * The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
6473 * will also be dominated by the loop-top, lpHead->bbNext.
6477 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
6479 LoopDsc* pLoopDsc = &optLoopTable[lnum];
6481 /* This loop has to be a "do-while" loop */
6483 assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
6485 /* Have we already created a loop-preheader block? */
6487 if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
6492 BasicBlock* head = pLoopDsc->lpHead;
6493 BasicBlock* top = pLoopDsc->lpTop;
6494 BasicBlock* entry = pLoopDsc->lpEntry;
6496 // if 'entry' and 'head' are in different try regions then we won't be able to hoist
6497 if (!BasicBlock::sameTryRegion(head, entry))
6502 // Ensure that lpHead always dominates lpEntry
6504 noway_assert(fgDominate(head, entry));
6506 /* Get hold of the first block of the loop body */
6508 assert(top == entry);
6510 /* Allocate a new basic block */
6512 BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
6513 preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
6515 // Must set IL code offset
6516 preHead->bbCodeOffs = top->bbCodeOffs;
6518 // Set the default value of the preHead weight in case we don't have
6519 // valid profile data and since this blocks weight is just an estimate
6520 // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
6522 preHead->inheritWeight(head);
6523 preHead->bbFlags &= ~BBF_PROF_WEIGHT;
6528 printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
6529 lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
6533 // The preheader block is part of the containing loop (if any).
6534 preHead->bbNatLoopNum = pLoopDsc->lpParent;
6536 if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
6538 if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
6540 preHead->bbWeight = 0;
6541 preHead->bbFlags |= BBF_RUN_RARELY;
6545 bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6546 ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
6547 ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
6549 if (allValidProfileWeights)
6551 double loopEnteredCount;
6552 double loopSkippedCount;
6554 if (fgHaveValidEdgeWeights)
6556 flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
6557 flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
6558 noway_assert(edgeToNext != nullptr);
6559 noway_assert(edgeToJump != nullptr);
6562 ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
6564 ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
6568 loopEnteredCount = (double)head->bbNext->bbWeight;
6569 loopSkippedCount = (double)head->bbJumpDest->bbWeight;
6572 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
6574 // Calculate a good approximation of the preHead's block weight
6575 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
6576 preHead->setBBWeight(max(preHeadWeight, 1));
6577 noway_assert(!preHead->isRunRarely());
6582 // Link in the preHead block.
6583 fgInsertBBbefore(top, preHead);
6585 // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
6586 // However, that is too expensive at this point. Instead, we update the phi
6587 // node block references, if we created pre-header block due to hoisting.
6588 // This is sufficient because any definition participating in SSA that flowed
6589 // into the phi via the loop header block will now flow through the preheader
6590 // block from the header block.
6592 for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
6594 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
6595 if (tree->OperGet() != GT_ASG)
6599 GenTreePtr op2 = tree->gtGetOp2();
6600 if (op2->OperGet() != GT_PHI)
6604 GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
6605 while (args != nullptr)
6607 GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
6608 if (phiArg->gtPredBB == head)
6610 phiArg->gtPredBB = preHead;
6612 args = args->Rest();
6616 // The handler can't begin at the top of the loop. If it did, it would be incorrect
6617 // to set the handler index on the pre header without updating the exception table.
6618 noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
6620 // Update the EH table to make the hoisted block part of the loop's EH block.
6621 fgExtendEHRegionBefore(top);
6623 // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
6624 // (e.g: hoisting expression in a loop with the same 'head' as this one)
6626 /* Update the loop entry */
6628 pLoopDsc->lpHead = preHead;
6629 pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
6631 /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
6632 All predecessors of 'beg', (which is the entry in the loop)
6633 now have to jump to 'preHead', unless they are dominated by 'head' */
6635 preHead->bbRefs = 0;
6636 fgAddRefPred(preHead, head);
6637 bool checkNestedLoops = false;
6639 for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
6641 BasicBlock* predBlock = pred->flBlock;
6643 if (fgDominate(top, predBlock))
6645 // note: if 'top' dominates predBlock, 'head' dominates predBlock too
6646 // (we know that 'head' dominates 'top'), but using 'top' instead of
6647 // 'head' in the test allows us to not enter here if 'predBlock == head'
6649 if (predBlock != pLoopDsc->lpBottom)
6651 noway_assert(predBlock != head);
6652 checkNestedLoops = true;
6657 switch (predBlock->bbJumpKind)
6660 noway_assert(predBlock == head);
6664 if (predBlock == head)
6666 noway_assert(predBlock->bbJumpDest != top);
6672 case BBJ_EHCATCHRET:
6673 noway_assert(predBlock->bbJumpDest == top);
6674 predBlock->bbJumpDest = preHead;
6675 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6677 if (predBlock == head)
6679 // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
6680 // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
6681 // Just break, pred will be removed after switch.
6685 fgRemoveRefPred(top, predBlock);
6686 fgAddRefPred(preHead, predBlock);
6692 jumpCnt = predBlock->bbJumpSwt->bbsCount;
6693 BasicBlock** jumpTab;
6694 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
6699 if ((*jumpTab) == top)
6701 (*jumpTab) = preHead;
6703 fgRemoveRefPred(top, predBlock);
6704 fgAddRefPred(preHead, predBlock);
6705 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
6707 } while (++jumpTab, --jumpCnt);
6710 noway_assert(!"Unexpected bbJumpKind");
6715 noway_assert(!fgGetPredForBlock(top, preHead));
6716 fgRemoveRefPred(top, head);
6717 fgAddRefPred(top, preHead);
6720 If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
6721 (other than the back-edge of the loop we are considering) then we likely have nested
6722 do-while loops with the same entry block and inserting the preheader block changes the head
6723 of all the nested loops. Now we will update this piece of information in the loop table, and
6724 mark all nested loops as having a preheader (the preheader block can be shared among all nested
6725 do-while loops with the same entry block).
6727 if (checkNestedLoops)
6729 for (unsigned l = 0; l < optLoopCount; l++)
6731 if (optLoopTable[l].lpHead == head)
6733 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
6734 noway_assert(optLoopTable[l].lpEntry == top);
6735 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
6736 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
6740 printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
6741 l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
6749 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
6751 unsigned lnum = blk->bbNatLoopNum;
6752 while (lnum != BasicBlock::NOT_IN_LOOP)
6754 if (optLoopTable[lnum].lpEntry == blk)
6759 lnum = optLoopTable[lnum].lpParent;
6764 void Compiler::optComputeLoopSideEffects()
6767 for (lnum = 0; lnum < optLoopCount; lnum++)
6769 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
6770 VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
6771 optLoopTable[lnum].lpContainsCall = false;
6774 for (lnum = 0; lnum < optLoopCount; lnum++)
6776 if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6781 if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6782 { // Is outermost...
6783 optComputeLoopNestSideEffects(lnum);
6787 VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
6788 #ifndef _TARGET_64BIT_
6789 VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
6792 for (unsigned i = 0; i < lvaCount; i++)
6794 LclVarDsc* varDsc = &lvaTable[i];
6795 if (varDsc->lvTracked)
6797 if (varTypeIsFloating(varDsc->lvType))
6799 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
6801 #ifndef _TARGET_64BIT_
6802 else if (varTypeIsLong(varDsc->lvType))
6804 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
6811 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
6813 assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
6814 BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
6815 for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
6817 optComputeLoopSideEffectsOfBlock(bbInLoop);
6821 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
6823 unsigned mostNestedLoop = blk->bbNatLoopNum;
6824 assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
6826 AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
6828 bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
6830 // Now iterate over the remaining statements, and their trees.
6831 for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
6833 for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
6835 genTreeOps oper = tree->OperGet();
6837 // Even after we set heapHavoc we still may want to know if a loop contains calls
6840 if (oper == GT_CALL)
6842 // Record that this loop contains a call
6843 AddContainsCallAllContainingLoops(mostNestedLoop);
6846 // If we just set lpContainsCall or it was previously set
6847 if (optLoopTable[mostNestedLoop].lpContainsCall)
6849 // We can early exit after both heapHavoc and lpContainsCall are both set to true.
6853 // We are just looking for GT_CALL nodes after heapHavoc was set.
6857 // otherwise heapHavoc is not set
6860 // This body is a distillation of the heap-side effect code of value numbering.
6861 // We also do a very limited analysis if byref PtrTo values, to cover some cases
6862 // that the compiler creates.
6864 if (GenTree::OperIsAssignment(oper))
6866 GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6868 if (lhs->OperGet() == GT_IND)
6870 GenTreePtr arg = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
6871 FieldSeqNode* fldSeqArrElem = nullptr;
6873 if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
6881 if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
6883 // If it's a local byref for which we recorded a value number, use that...
6884 GenTreeLclVar* argLcl = arg->AsLclVar();
6885 if (!fgExcludeFromSsa(argLcl->GetLclNum()))
6888 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
6890 if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
6891 funcApp.m_func == VNF_PtrToArrElem)
6893 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
6894 CORINFO_CLASS_HANDLE elemType =
6895 CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
6896 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
6897 // Don't set heapHavoc below.
6904 // Is the LHS an array index expression?
6905 else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
6907 // We actually ignore "fldSeq" -- any modification to an S[], at any
6908 // field of "S", will lose all information about the array type.
6909 CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6910 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
6914 // We are only interested in IsFieldAddr()'s fldSeq out parameter.
6916 GenTreePtr obj = nullptr; // unused
6917 GenTreePtr staticOffset = nullptr; // unused
6918 FieldSeqNode* fldSeq = nullptr;
6920 if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
6921 (fldSeq != FieldSeqStore::NotAField()))
6923 // Get the first (object) field from field seq. Heap[field] will yield the "field map".
6924 assert(fldSeq != nullptr);
6925 if (fldSeq->IsFirstElemFieldSeq())
6927 fldSeq = fldSeq->m_next;
6928 assert(fldSeq != nullptr);
6931 AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
6939 else if (lhs->OperIsBlk())
6941 GenTreeLclVarCommon* lclVarTree;
6943 if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
6945 // For now, assume arbitrary side effects on the heap...
6949 else if (lhs->OperGet() == GT_CLS_VAR)
6951 AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
6953 // Otherwise, must be local lhs form. I should assert that.
6954 else if (lhs->OperGet() == GT_LCL_VAR)
6956 GenTreeLclVar* lhsLcl = lhs->AsLclVar();
6957 GenTreePtr rhs = tree->gtOp.gtOp2;
6958 ValueNum rhsVN = rhs->gtVNPair.GetLiberal();
6959 // If we gave the RHS a value number, propagate it.
6960 if (rhsVN != ValueNumStore::NoVN)
6962 rhsVN = vnStore->VNNormVal(rhsVN);
6963 if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
6965 lvaTable[lhsLcl->GetLclNum()]
6966 .GetPerSsaData(lhsLcl->GetSsaNum())
6967 ->m_vnPair.SetLiberal(rhsVN);
6972 else // not GenTree::OperIsAssignment(oper)
6977 tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
6981 // Is it an addr of a array index expression?
6983 GenTreePtr addrArg = tree->gtOp.gtOp1;
6984 if (addrArg->OperGet() == GT_IND)
6986 // Is the LHS an array index expression?
6987 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
6990 bool b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
6992 CORINFO_CLASS_HANDLE elemType =
6993 EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
6994 tree->gtVNPair.SetBoth(
6995 vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
6996 vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
6997 // The rest are dummy arguments.
6998 vnStore->VNForNull(), vnStore->VNForNull(),
6999 vnStore->VNForNull()));
7005 case GT_LOCKADD: // Binop
7006 case GT_XADD: // Binop
7007 case GT_XCHG: // Binop
7008 case GT_CMPXCHG: // Specialop
7016 GenTreeCall* call = tree->AsCall();
7018 // Record that this loop contains a call
7019 AddContainsCallAllContainingLoops(mostNestedLoop);
7021 if (call->gtCallType == CT_HELPER)
7023 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7024 if (s_helperCallProperties.MutatesHeap(helpFunc))
7028 else if (s_helperCallProperties.MayRunCctor(helpFunc))
7030 // If the call is labeled as "Hoistable", then we've checked the
7031 // class that would be constructed, and it is not precise-init, so
7032 // the cctor will not be run by this call. Otherwise, it might be,
7033 // and might have arbitrary side effects.
7034 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7048 // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
7057 // Record that all loops containing this block have heap havoc effects.
7058 unsigned lnum = mostNestedLoop;
7059 while (lnum != BasicBlock::NOT_IN_LOOP)
7061 optLoopTable[lnum].lpLoopHasHeapHavoc = true;
7062 lnum = optLoopTable[lnum].lpParent;
7067 // Marks the containsCall information to "lnum" and any parent loops.
7068 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7070 assert(0 <= lnum && lnum < optLoopCount);
7071 while (lnum != BasicBlock::NOT_IN_LOOP)
7073 optLoopTable[lnum].lpContainsCall = true;
7074 lnum = optLoopTable[lnum].lpParent;
7078 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7079 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7081 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7082 VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7084 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7085 VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7088 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7089 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7091 assert(0 <= lnum && lnum < optLoopCount);
7092 while (lnum != BasicBlock::NOT_IN_LOOP)
7094 optLoopTable[lnum].AddVariableLiveness(this, blk);
7095 lnum = optLoopTable[lnum].lpParent;
7099 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7100 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7102 assert(0 <= lnum && lnum < optLoopCount);
7103 while (lnum != BasicBlock::NOT_IN_LOOP)
7105 optLoopTable[lnum].AddModifiedField(this, fldHnd);
7106 lnum = optLoopTable[lnum].lpParent;
7110 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7111 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7113 assert(0 <= lnum && lnum < optLoopCount);
7114 while (lnum != BasicBlock::NOT_IN_LOOP)
7116 optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7117 lnum = optLoopTable[lnum].lpParent;
7121 /*****************************************************************************
7123 * Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7124 * The 'keepList'is either a single tree or a list of trees that are formed by
7125 * one or more GT_COMMA nodes. It is the kept side-effects as returned by the
7126 * gtExtractSideEffList method.
7130 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
7132 GenTreePtr tree = *pTree;
7133 Compiler* comp = data->compiler;
7134 GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
7136 // We may have a non-NULL side effect list that is being kept
7140 GenTreePtr keptTree = keepList;
7141 while (keptTree->OperGet() == GT_COMMA)
7143 assert(keptTree->OperKind() & GTK_SMPOP);
7144 GenTreePtr op1 = keptTree->gtOp.gtOp1;
7145 GenTreePtr op2 = keptTree->gtGetOp2();
7147 // For the GT_COMMA case the op1 is part of the orginal CSE tree
7148 // that is being kept because it contains some side-effect
7152 // This tree and all of its sub trees are being kept.
7153 return WALK_SKIP_SUBTREES;
7156 // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7157 // which can again be another GT_COMMA or the final side-effect part
7161 if (tree == keptTree)
7163 // This tree and all of its sub trees are being kept.
7164 return WALK_SKIP_SUBTREES;
7168 // This node is being removed from the graph of GenTreePtr
7170 // Look for any local variable references
7172 if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7177 /* This variable ref is going away, decrease its ref counts */
7179 lclNum = tree->gtLclVarCommon.gtLclNum;
7180 assert(lclNum < comp->lvaCount);
7181 varDsc = comp->lvaTable + lclNum;
7183 // make sure it's been initialized
7184 assert(comp->compCurBB != nullptr);
7185 assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7187 /* Decrement its lvRefCnt and lvRefCntWtd */
7189 // Use getBBWeight to determine the proper block weight.
7190 // This impacts the block weights when we have IBC data.
7191 varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7194 return WALK_CONTINUE;
7197 /*****************************************************************************
7199 * Routine called to decrement the LclVar ref counts when removing a tree
7200 * during the remove RangeCheck phase.
7201 * This method will decrement the refcounts for any LclVars used below 'deadTree',
7202 * unless the node is found in the 'keepList' (which are saved side effects)
7203 * The keepList is communicated using the walkData.pCallbackData field
7204 * Also the compCurBB must be set to the current BasicBlock which contains
7205 * 'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7208 void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
7210 // We communicate this value using the walkData.pCallbackData field
7212 fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7215 /*****************************************************************************
7217 * Given an array index node, mark it as not needing a range check.
7220 void Compiler::optRemoveRangeCheck(
7221 GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
7237 noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
7240 noway_assert(stmt->gtOper == GT_STMT);
7241 noway_assert(tree->gtOper == GT_COMMA);
7242 noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
7243 noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
7245 GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
7250 printf("Before optRemoveRangeCheck:\n");
7255 GenTreePtr sideEffList = nullptr;
7258 gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
7261 // Decrement the ref counts for any LclVars that are being deleted
7263 optRemoveTree(tree->gtOp.gtOp1, sideEffList);
7265 // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
7266 tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
7268 // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
7269 tree->gtFlags |= GTF_DONT_CSE;
7271 /* Recalculate the gtCostSz, etc... */
7272 gtSetStmtInfo(stmt);
7274 /* Re-thread the nodes if necessary */
7275 if (fgStmtListThreaded)
7283 printf("After optRemoveRangeCheck:\n");
7289 /*****************************************************************************
7290 * Return the scale in an array reference, given a pointer to the
7291 * multiplication node.
7294 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
7297 assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
7298 assert(mul->gtOp.gtOp2->IsCnsIntOrI());
7300 ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
7302 if (mul->gtOper == GT_LSH)
7304 scale = ((ssize_t)1) << scale;
7307 GenTreePtr index = mul->gtOp.gtOp1;
7309 if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
7311 // case of two cascading multiplications for constant int (e.g. * 20 morphed to * 5 * 4):
7312 // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
7313 // we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
7314 // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
7315 scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
7316 index = index->gtOp.gtOp1;
7319 assert(!bRngChk || index->gtOper != GT_COMMA);
7329 /*****************************************************************************
7330 * Find the last assignment to of the local variable in the block. Return
7331 * RHS or NULL. If any local variable in the RHS has been killed in
7332 * intervening code, return NULL. If the variable being searched for is killed
7333 * in the intervening code, return NULL.
7337 GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
7339 VARSET_TP* pKilledInOut,
7340 bool* pLhsRhsKilledAfterInit)
7342 assert(pKilledInOut);
7343 assert(pLhsRhsKilledAfterInit);
7345 *pLhsRhsKilledAfterInit = false;
7347 unsigned LclNum = local->gtLclVarCommon.gtLclNum;
7349 GenTreePtr list = block->bbTreeList;
7350 if (list == nullptr)
7355 GenTreePtr rhs = nullptr;
7356 GenTreePtr stmt = list;
7359 stmt = stmt->gtPrev;
7360 if (stmt == nullptr)
7365 GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
7366 // If we encounter an assignment to a local variable,
7367 if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
7369 // And the assigned variable equals the input local,
7370 if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
7372 // If the assignment is '=' and it is not a conditional, then return rhs.
7373 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
7375 rhs = tree->gtOp.gtOp2;
7377 // If the assignment is 'op=' or a conditional equal, then the search ends here,
7378 // as we found a kill to the input local.
7381 *pLhsRhsKilledAfterInit = true;
7382 assert(rhs == nullptr);
7388 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
7389 if (varDsc == nullptr)
7393 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
7396 } while (stmt != list);
7403 // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
7404 varRefKinds rhsRefs = VR_NONE;
7405 VARSET_TP VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
7406 bool b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
7407 if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
7409 // If RHS has been indirectly referenced, consider it a write and a kill.
7410 *pLhsRhsKilledAfterInit = true;
7417 /*****************************************************************************
7419 * Return true if "op1" is guaranteed to be less then or equal to "op2".
7424 bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
7426 if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
7428 add1 += op1->gtIntCon.gtIconVal;
7429 add2 += op2->gtIntCon.gtIconVal;
7433 /* Check for +/- constant on either operand */
7435 if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
7437 add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
7438 op1 = op1->gtOp.gtOp1;
7441 if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
7443 add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
7444 op2 = op2->gtOp.gtOp1;
7447 /* We only allow local variable references */
7449 if (op1->gtOper != GT_LCL_VAR)
7451 if (op2->gtOper != GT_LCL_VAR)
7453 if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
7456 /* NOTE: Caller ensures that this variable has only one def */
7458 // printf("limit [%d]:\n", add1); gtDispTree(op1);
7459 // printf("size [%d]:\n", add2); gtDispTree(op2);
7463 return (bool)(add1 <= add2);
7468 //------------------------------------------------------------------------------
7469 // optObtainLoopCloningOpts: Identify optimization candidates and update
7470 // the "context" for array optimizations.
7473 // context - data structure where all loop cloning info is kept. The
7474 // optInfo fields of the context are updated with the
7475 // identified optimization candidates.
7477 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
7479 for (unsigned i = 0; i < optLoopCount; i++)
7481 JITDUMP("Considering loop %d to clone for optimizations.\n", i);
7482 if (optIsLoopClonable(i))
7484 if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
7486 optIdentifyLoopOptInfo(i, context);
7489 JITDUMP("------------------------------------------------------------\n");
7494 //------------------------------------------------------------------------
7495 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
7496 // check if the loop is suitable for the optimizations performed.
7499 // loopNum - the current loop index for which conditions are derived.
7500 // context - data structure where all loop cloning candidates will be
7504 // If the loop is not suitable for the optimizations, return false - context
7505 // should not contain any optimization candidate for the loop if false.
7506 // Else return true.
7509 // Check if the loop is well formed for this optimization and identify the
7510 // optimization candidates and update the "context" parameter with all the
7511 // contextual information necessary to perform the optimization later.
7513 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
7515 noway_assert(loopNum < optLoopCount);
7517 LoopDsc* pLoop = &optLoopTable[loopNum];
7519 if (!(pLoop->lpFlags & LPFLG_ITER))
7521 JITDUMP("> No iter flag on loop %d.\n", loopNum);
7525 unsigned ivLclNum = pLoop->lpIterVar();
7526 if (lvaVarAddrExposed(ivLclNum))
7528 JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
7532 BasicBlock* head = pLoop->lpHead;
7533 BasicBlock* end = pLoop->lpBottom;
7534 BasicBlock* beg = head->bbNext;
7536 if (end->bbJumpKind != BBJ_COND)
7538 JITDUMP("> Couldn't find termination test.\n");
7542 if (end->bbJumpDest != beg)
7544 JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
7548 // TODO-CQ: CLONE: Mark increasing or decreasing loops.
7549 if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
7551 JITDUMP("> Loop iteration operator not matching\n");
7555 if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
7556 (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
7558 JITDUMP("> Loop limit is neither constant, variable or array length\n");
7562 if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
7563 (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
7564 ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
7565 (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
7567 JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
7568 GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
7572 if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
7574 JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
7575 pLoop->lpTestTree->gtTreeID);
7580 GenTreePtr op1 = pLoop->lpIterator();
7581 noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
7584 JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
7585 end->bbNext ? end->bbNext->bbNum : 0);
7587 LoopCloneVisitorInfo info(context, loopNum, nullptr);
7588 for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
7591 for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
7594 fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
7601 //---------------------------------------------------------------------------------------------------------------
7602 // optExtractArrIndex: Try to extract the array index from "tree".
7605 // tree the tree to be checked if it is the array [] operation.
7606 // result the extracted GT_INDEX information is updated in result.
7607 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7610 // Returns true if array index can be extracted, else, return false. See assumption about
7611 // what will be extracted. The "result" variable's rank parameter is advanced for every
7612 // dimension of [] encountered.
7615 // Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
7616 // we have converted a GT_INDEX tree into a scaled index base offset expression. We need
7617 // to reconstruct this to be able to know if this is an array access.
7620 // The method extracts only if the array base and indices are GT_LCL_VAR.
7622 // TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
7624 // [000000001AF828D8] ---XG------- indir int
7625 // [000000001AF872C8] ------------ const long 16 Fseq[#FirstElem]
7626 // [000000001AF87340] ------------ + byref
7627 // [000000001AF87160] -------N---- const long 2
7628 // [000000001AF871D8] ------------ << long
7629 // [000000001AF870C0] ------------ cast long <- int
7630 // [000000001AF86F30] i----------- lclVar int V04 loc0
7631 // [000000001AF87250] ------------ + byref
7632 // [000000001AF86EB8] ------------ lclVar ref V01 arg1
7633 // [000000001AF87468] ---XG------- comma int
7634 // [000000001AF87020] ---X-------- arrBndsChk void
7635 // [000000001AF86FA8] ---X-------- arrLen int
7636 // [000000001AF827E8] ------------ lclVar ref V01 arg1
7637 // [000000001AF82860] ------------ lclVar int V04 loc0
7638 // [000000001AF829F0] -A-XG------- = int
7639 // [000000001AF82978] D------N---- lclVar int V06 tmp0
7641 bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7643 if (tree->gtOper != GT_COMMA)
7647 GenTreePtr before = tree->gtGetOp1();
7648 if (before->gtOper != GT_ARR_BOUNDS_CHECK)
7652 GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
7653 if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
7657 if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
7661 unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
7662 if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
7667 unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
7669 GenTreePtr after = tree->gtGetOp2();
7671 if (after->gtOper != GT_IND)
7675 // It used to be the case that arrBndsChks for struct types would fail the previous check because
7676 // after->gtOper was an address (for a block op). In order to avoid asmDiffs we will for now
7677 // return false if the type of 'after' is a struct type. (This was causing us to clone loops
7678 // that we were not previously cloning.)
7679 // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
7681 if (varTypeIsStruct(after))
7686 GenTreePtr sibo = after->gtGetOp1();
7687 if (sibo->gtOper != GT_ADD)
7691 GenTreePtr sib = sibo->gtGetOp1();
7692 GenTreePtr ofs = sibo->gtGetOp2();
7693 if (ofs->gtOper != GT_CNS_INT)
7697 if (sib->gtOper != GT_ADD)
7701 GenTreePtr si = sib->gtGetOp2();
7702 GenTreePtr base = sib->gtGetOp1();
7703 if (si->gtOper != GT_LSH)
7707 if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
7711 GenTreePtr scale = si->gtGetOp2();
7712 GenTreePtr index = si->gtGetOp1();
7713 if (scale->gtOper != GT_CNS_INT)
7717 #ifdef _TARGET_AMD64_
7718 if (index->gtOper != GT_CAST)
7722 GenTreePtr indexVar = index->gtGetOp1();
7724 GenTreePtr indexVar = index;
7726 if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
7730 if (lhsNum == BAD_VAR_NUM)
7732 result->arrLcl = arrLcl;
7734 result->indLcls.Push(indLcl);
7735 result->bndsChks.Push(tree);
7736 result->useBlock = compCurBB;
7742 //---------------------------------------------------------------------------------------------------------------
7743 // optReconstructArrIndex: Reconstruct array index.
7746 // tree the tree to be checked if it is an array [][][] operation.
7747 // result the extracted GT_INDEX information.
7748 // lhsNum for the root level (function is recursive) callers should be BAD_VAR_NUM.
7751 // Returns true if array index can be extracted, else, return false. "rank" field in
7752 // "result" contains the array access depth. The "indLcls" fields contain the indices.
7755 // Recursively look for a list of array indices. In the example below, we encounter,
7756 // V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
7757 // The return value would then be:
7758 // ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
7760 // V00[V01][V02] would be morphed as:
7762 // [000000001B366848] ---XG------- indir int
7763 // [000000001B36BC50] ------------ V05 + (V02 << 2) + 16
7764 // [000000001B36C200] ---XG------- comma int
7765 // [000000001B36BDB8] ---X-------- arrBndsChk(V05, V02)
7766 // [000000001B36C278] -A-XG------- comma int
7767 // [000000001B366730] R--XG------- indir ref
7768 // [000000001B36C2F0] ------------ V00 + (V01 << 3) + 24
7769 // [000000001B36C818] ---XG------- comma ref
7770 // [000000001B36C458] ---X-------- arrBndsChk(V00, V01)
7771 // [000000001B36BB60] -A-XG------- = ref
7772 // [000000001B36BAE8] D------N---- lclVar ref V05 tmp2
7773 // [000000001B36A668] -A-XG------- = int
7774 // [000000001B36A5F0] D------N---- lclVar int V03 tmp0
7777 // The method extracts only if the array base and indices are GT_LCL_VAR.
7779 bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
7781 // If we can extract "tree" (which is a top level comma) return.
7782 if (optExtractArrIndex(tree, result, lhsNum))
7786 // We have a comma (check if array base expr is computed in "before"), descend further.
7787 else if (tree->OperGet() == GT_COMMA)
7789 GenTreePtr before = tree->gtGetOp1();
7790 // "before" should evaluate an array base for the "after" indexing.
7791 if (before->OperGet() != GT_ASG)
7795 GenTreePtr lhs = before->gtGetOp1();
7796 GenTreePtr rhs = before->gtGetOp2();
7798 // "rhs" should contain an GT_INDEX
7799 if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
7803 unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
7804 GenTreePtr after = tree->gtGetOp2();
7805 // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
7806 return optExtractArrIndex(after, result, lhsNum);
7812 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
7814 return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
7817 //-------------------------------------------------------------------------
7818 // optIsStackLocalInvariant: Is stack local invariant in loop.
7821 // loopNum The loop in which the variable is tested for invariance.
7822 // lclNum The local that is tested for invariance in the loop.
7825 // Returns true if the variable is loop invariant in loopNum.
7827 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
7829 if (lvaVarAddrExposed(lclNum))
7833 if (optIsVarAssgLoop(loopNum, lclNum))
7840 //----------------------------------------------------------------------------------------------
7841 // optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
7842 // identify as potential candidate and update the loop context.
7845 // tree The tree encountered during the tree walk.
7846 // info Supplies information about the current block or stmt in which the tree is.
7847 // Also supplies the "context" pointer for updating with loop cloning
7848 // candidates. Also supplies loopNum.
7851 // If array index can be reconstructed, check if the iter var of the loop matches the
7852 // array index var in some dim. Also ensure other index vars before the identified
7853 // dim are loop invariant.
7856 // Skip sub trees if the optimization candidate is identified or else continue walking
7858 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
7860 ArrIndex arrIndex(getAllocator());
7862 // Check if array index can be optimized.
7863 if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
7865 assert(tree->gtOper == GT_COMMA);
7869 JITDUMP("Found ArrIndex at tree ");
7871 printf(" which is equivalent to: ");
7876 if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
7878 return WALK_SKIP_SUBTREES;
7881 // Walk the dimensions and see if iterVar of the loop is used as index.
7882 for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
7884 // Is index variable also used as the loop iter var.
7885 if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
7887 // Check the previous indices are all loop invariant.
7888 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
7890 if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
7892 JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
7893 return WALK_SKIP_SUBTREES;
7899 JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
7901 JITDUMP(" on dim %d\n", dim);
7904 // Update the loop context.
7905 info->context->EnsureLoopOptInfo(info->loopNum)
7906 ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
7910 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
7914 return WALK_SKIP_SUBTREES;
7916 else if (tree->gtOper == GT_ARR_ELEM)
7918 // TODO-CQ: CLONE: Implement.
7919 return WALK_SKIP_SUBTREES;
7921 return WALK_CONTINUE;
7924 struct optRangeCheckDsc
7926 Compiler* pCompiler;
7930 Walk to make sure that only locals and constants are contained in the index
7933 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
7935 GenTreePtr tree = *pTree;
7936 optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
7938 if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
7940 pData->bValidIndex = false;
7944 if (tree->gtOper == GT_LCL_VAR)
7946 if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
7948 pData->bValidIndex = false;
7953 return WALK_CONTINUE;
7957 returns true if a range check can legally be removed (for the moment it checks
7958 that the array is a local array (non subject to racing conditions) and that the
7959 index is either a constant or a local
7961 bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
7963 noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
7964 GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
7965 GenTreePtr pArray = bndsChk->GetArray();
7966 if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
7970 GenTreePtr pIndex = bndsChk->gtIndex;
7972 // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
7973 // Otherwise we can be targeted by malicious race-conditions.
7974 if (pArray != nullptr)
7976 if (pArray->gtOper != GT_LCL_VAR)
7982 printf("Can't remove range check if the array isn't referenced with a local\n");
7990 noway_assert(pArray->gtType == TYP_REF);
7991 noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
7993 if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
7995 // If the array address has been taken, don't do the optimization
7996 // (this restriction can be lowered a bit, but i don't think it's worth it)
7997 CLANG_FORMAT_COMMENT_ANCHOR;
8001 printf("Can't remove range check if the array has its address taken\n");
8010 optRangeCheckDsc Data;
8011 Data.pCompiler = this;
8012 Data.bValidIndex = true;
8014 fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8016 if (!Data.bValidIndex)
8021 printf("Can't remove range check with this index");
8032 /******************************************************************************
8034 * Replace x==null with (x|x)==0 if x is a GC-type.
8035 * This will stress code-gen and the emitter to make sure they support such trees.
8040 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8042 if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8047 noway_assert(condBlock->bbJumpKind == BBJ_COND);
8048 GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8050 noway_assert(condStmt->gtOper == GT_JTRUE);
8055 GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
8057 if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8062 if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8067 GenTreePtr comparandClone = gtCloneExpr(comparand);
8069 // Bump up the ref-counts of any variables in 'comparandClone'
8070 compCurBB = condBlock;
8071 fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
8073 noway_assert(relop->gtOp.gtOp1 == comparand);
8074 genTreeOps oper = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8075 relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8077 // Comparand type is already checked, and we have const int, there is no harm
8078 // morphing it into a TYP_I_IMPL.
8079 noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8080 relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8085 /******************************************************************************
8086 * Function used by folding of boolean conditionals
8087 * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8088 * "if (boolVal ==/!= 0/1)". This is translated into a GT_EQ node with "op1"
8089 * being a boolean lclVar and "op2" the const 0/1.
8090 * On success, the comparand (ie. boolVal) is returned. Else NULL.
8091 * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8092 * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8093 * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8094 * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8097 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8099 bool isBool = false;
8101 noway_assert(condBranch->gtOper == GT_JTRUE);
8102 GenTree* cond = condBranch->gtOp.gtOp1;
8104 /* The condition must be "!= 0" or "== 0" */
8106 if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8111 /* Return the compare node to the caller */
8115 /* Get hold of the comparands */
8117 GenTree* opr1 = cond->gtOp.gtOp1;
8118 GenTree* opr2 = cond->gtOp.gtOp2;
8120 if (opr2->gtOper != GT_CNS_INT)
8125 if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8130 ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8132 /* Is the value a boolean?
8133 * We can either have a boolean expression (marked GTF_BOOLEAN) or
8134 * a local variable that is marked as being boolean (lvIsBoolean) */
8136 if (opr1->gtFlags & GTF_BOOLEAN)
8140 else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8144 else if (opr1->gtOper == GT_LCL_VAR)
8146 /* is it a boolean local variable */
8148 unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8149 noway_assert(lclNum < lvaCount);
8151 if (lvaTable[lclNum].lvIsBoolean)
8157 /* Was our comparison against the constant 1 (i.e. true) */
8160 // If this is a boolean expression tree we can reverse the relop
8161 // and change the true to false.
8164 gtReverseCond(cond);
8165 opr2->gtIntCon.gtIconVal = 0;
8177 void Compiler::optOptimizeBools()
8182 printf("*************** In optOptimizeBools()\n");
8185 printf("Blocks/Trees before phase\n");
8186 fgDispBasicBlocks(true);
8196 for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8198 /* We're only interested in conditional jumps here */
8200 if (b1->bbJumpKind != BBJ_COND)
8205 /* If there is no next block, we're done */
8207 BasicBlock* b2 = b1->bbNext;
8213 /* The next block must not be marked as BBF_DONT_REMOVE */
8214 if (b2->bbFlags & BBF_DONT_REMOVE)
8219 /* The next block also needs to be a condition */
8221 if (b2->bbJumpKind != BBJ_COND)
8224 optOptimizeBoolsGcStress(b1);
8229 bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8231 if (b1->bbJumpDest == b2->bbJumpDest)
8233 /* Given the following sequence of blocks :
8237 we wil try to fold it to :
8238 B1: brtrue(t1|t2, BX)
8244 else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8246 /* Given the following sequence of blocks :
8250 we will try to fold it to :
8251 B1: brtrue((!t1)&&t2, B3)
8262 /* The second block must contain a single statement */
8264 GenTreePtr s2 = b2->bbTreeList;
8265 if (s2->gtPrev != s2)
8270 noway_assert(s2->gtOper == GT_STMT);
8271 GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
8272 noway_assert(t2->gtOper == GT_JTRUE);
8274 /* Find the condition for the first block */
8276 GenTreePtr s1 = b1->bbTreeList->gtPrev;
8278 noway_assert(s1->gtOper == GT_STMT);
8279 GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
8280 noway_assert(t1->gtOper == GT_JTRUE);
8282 if (b2->countOfInEdges() > 1)
8287 /* Find the branch conditions of b1 and b2 */
8291 GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
8297 GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
8303 noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
8304 noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
8306 // Leave out floats where the bit-representation is more complicated
8307 // - there are two representations for 0.
8309 if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
8314 // Make sure the types involved are of the same sizes
8315 if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
8319 if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
8323 #ifdef _TARGET_ARMARCH_
8324 // Skip the small operand which we cannot encode.
8325 if (varTypeIsSmall(c1->TypeGet()))
8328 /* The second condition must not contain side effects */
8330 if (c2->gtFlags & GTF_GLOB_EFFECT)
8335 /* The second condition must not be too expensive */
8339 if (c2->gtCostEx > 12)
8346 var_types foldType = c1->TypeGet();
8347 if (varTypeIsGC(foldType))
8349 foldType = TYP_I_IMPL;
8354 /* Both conditions must be the same */
8356 if (t1->gtOper != t2->gtOper)
8361 if (t1->gtOper == GT_EQ)
8363 /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
8364 So we will branch to BX if (c1&c2)==0 */
8371 /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
8372 So we will branch to BX if (c1|c2)!=0 */
8380 /* The b1 condition must be the reverse of the b2 condition */
8382 if (t1->gtOper == t2->gtOper)
8387 if (t1->gtOper == GT_EQ)
8389 /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
8390 So we will branch to BX if (c1&c2)!=0 */
8397 /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
8398 So we will branch to BX if (c1|c2)==0 */
8405 // Anding requires both values to be 0 or 1
8407 if ((foldOp == GT_AND) && (!bool1 || !bool2))
8413 // Now update the trees
8415 GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
8418 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
8419 cmpOp1->gtFlags |= GTF_BOOLEAN;
8423 t1->gtOp.gtOp1 = cmpOp1;
8424 t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
8426 #if FEATURE_SET_FLAGS
8427 // For comparisons against zero we will have the GTF_SET_FLAGS set
8428 // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
8429 // during the CSE phase.
8431 // So make sure to clear any GTF_SET_FLAGS bit on these operations
8432 // as they are no longer feeding directly into a comparisons against zero
8434 // Make sure that the GTF_SET_FLAGS bit is cleared.
8435 // Fix 388436 ARM JitStress WP7
8436 c1->gtFlags &= ~GTF_SET_FLAGS;
8437 c2->gtFlags &= ~GTF_SET_FLAGS;
8439 // The new top level node that we just created does feed directly into
8440 // a comparison against zero, so set the GTF_SET_FLAGS bit so that
8441 // we generate an instuction that sets the flags, which allows us
8442 // to omit the cmp with zero instruction.
8444 // Request that the codegen for cmpOp1 sets the condition flags
8445 // when it generates the code for cmpOp1.
8447 cmpOp1->gtRequestSetFlags();
8450 flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
8453 /* Modify the target of the conditional jump and update bbRefs and bbPreds */
8457 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
8461 edge2 = fgGetPredForBlock(b2->bbNext, b2);
8463 fgRemoveRefPred(b1->bbJumpDest, b1);
8465 b1->bbJumpDest = b2->bbJumpDest;
8467 fgAddRefPred(b2->bbJumpDest, b1);
8470 noway_assert(edge1 != nullptr);
8471 noway_assert(edge2 != nullptr);
8473 BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
8474 BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
8475 if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
8477 edge1->flEdgeWeightMin = edgeSumMin;
8478 edge1->flEdgeWeightMax = edgeSumMax;
8482 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
8483 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
8486 /* Get rid of the second block (which is a BBJ_COND) */
8488 noway_assert(b1->bbJumpKind == BBJ_COND);
8489 noway_assert(b2->bbJumpKind == BBJ_COND);
8490 noway_assert(b1->bbJumpDest == b2->bbJumpDest);
8491 noway_assert(b1->bbNext == b2);
8492 noway_assert(b2->bbNext);
8495 b2->bbFlags |= BBF_REMOVED;
8497 // If b2 was the last block of a try or handler, update the EH table.
8499 ehUpdateForDeletedBlock(b2);
8501 /* Update bbRefs and bbPreds */
8503 /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
8504 * Remove pred 'b2' for 'b2->bbJumpDest' */
8506 fgReplacePred(b2->bbNext, b2, b1);
8508 fgRemoveRefPred(b2->bbJumpDest, b2);
8510 /* Update the block numbers and try again */
8522 // Update loop table
8523 fgUpdateLoopsAfterCompacting(b1, b2);
8528 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
8529 b1->bbNum, b2->bbNum);
8538 fgDebugCheckBBlist();