src/jit/optimizer.cpp

   1 // Licensed to the .NET Foundation under one or more agreements.
   2 // The .NET Foundation licenses this file to you under the MIT license.
   3 // See the LICENSE file in the project root for more information.
   4
   5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
   6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
   7 XX                                                                           XX
   8 XX                              Optimizer                                    XX
   9 XX                                                                           XX
  10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  12 */
  13
  14 #include "jitpch.h"
  15 #ifdef _MSC_VER
  16 #pragma hdrstop
  17 #pragma warning(disable : 4701)
  18 #endif
  19
  20 /*****************************************************************************/
  21
  22 #if COUNT_RANGECHECKS
  23 /* static */
  24 unsigned Compiler::optRangeChkRmv = 0;
  25 /* static */
  26 unsigned Compiler::optRangeChkAll = 0;
  27 #endif
  28
  29 /*****************************************************************************/
  30
  31 void Compiler::optInit()
  32 {
  33     optLoopsMarked = false;
  34     fgHasLoops     = false;
  35
  36     /* Initialize the # of tracked loops to 0 */
  37     optLoopCount = 0;
  38     /* Keep track of the number of calls and indirect calls made by this method */
  39     optCallCount         = 0;
  40     optIndirectCallCount = 0;
  41     optNativeCallCount   = 0;
  42     optAssertionCount    = 0;
  43     optAssertionDep      = nullptr;
  44 #if FEATURE_ANYCSE
  45     optCSECandidateTotal = 0;
  46     optCSEstart          = UINT_MAX;
  47     optCSEcount          = 0;
  48 #endif // FEATURE_ANYCSE
  49 }
  50
  51 DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
  52 {
  53 }
  54
  55 /*****************************************************************************
  56  *
  57  */
  58
  59 void Compiler::optSetBlockWeights()
  60 {
  61     noway_assert(!opts.MinOpts() && !opts.compDbgCode);
  62     assert(fgDomsComputed);
  63
  64 #ifdef DEBUG
  65     bool changed = false;
  66 #endif
  67
  68     bool firstBBdomsRets = true;
  69
  70     BasicBlock* block;
  71
  72     for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
  73     {
  74         /* Blocks that can't be reached via the first block are rarely executed */
  75         if (!fgReachable(fgFirstBB, block))
  76         {
  77             block->bbSetRunRarely();
  78         }
  79
  80         if (block->bbWeight != BB_ZERO_WEIGHT)
  81         {
  82             // Calculate our bbWeight:
  83             //
  84             //  o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
  85             //  o otherwise BB_UNITY_WEIGHT / 2
  86             //
  87             bool domsRets = true; // Assume that we will dominate
  88
  89             for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
  90             {
  91                 if (!fgDominate(block, retBlocks->block))
  92                 {
  93                     domsRets = false;
  94                     break;
  95                 }
  96             }
  97
  98             if (block == fgFirstBB)
  99             {
 100                 firstBBdomsRets = domsRets;
 101             }
 102
 103             // If we are not using profile weight then we lower the weight
 104             // of blocks that do not dominate a return block
 105             //
 106             if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
 107             {
 108 #if DEBUG
 109                 changed = true;
 110 #endif
 111                 block->modifyBBWeight(block->bbWeight / 2);
 112                 noway_assert(block->bbWeight);
 113             }
 114         }
 115     }
 116
 117 #if DEBUG
 118     if (changed && verbose)
 119     {
 120         printf("\nAfter optSetBlockWeights:\n");
 121         fgDispBasicBlocks();
 122         printf("\n");
 123     }
 124
 125     /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
 126     fgDebugCheckBBlist();
 127 #endif
 128 }
 129
 130 /*****************************************************************************
 131  *
 132  *  Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
 133  */
 134
 135 void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
 136 {
 137     /* Calculate the 'loopWeight',
 138        this is the amount to increase each block in the loop
 139        Our heuristic is that loops are weighted eight times more
 140        than straight line code.
 141        Thus we increase each block by 7 times the weight of
 142        the loop header block,
 143        if the loops are all properly formed gives us:
 144        (assuming that BB_LOOP_WEIGHT is 8)
 145
 146           1 -- non loop basic block
 147           8 -- single loop nesting
 148          64 -- double loop nesting
 149         512 -- triple loop nesting
 150
 151     */
 152
 153     noway_assert(begBlk->bbNum <= endBlk->bbNum);
 154     noway_assert(begBlk->isLoopHead());
 155     noway_assert(fgReachable(begBlk, endBlk));
 156
 157 #ifdef DEBUG
 158     if (verbose)
 159     {
 160         printf("\nMarking loop L%02u", begBlk->bbLoopNum);
 161     }
 162 #endif
 163
 164     noway_assert(!opts.MinOpts());
 165
 166     /* Build list of backedges for block begBlk */
 167     flowList* backedgeList = nullptr;
 168
 169     for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
 170     {
 171         /* Is this a backedge? */
 172         if (pred->flBlock->bbNum >= begBlk->bbNum)
 173         {
 174             flowList* flow = new (this, CMK_FlowList) flowList();
 175
 176 #if MEASURE_BLOCK_SIZE
 177             genFlowNodeCnt += 1;
 178             genFlowNodeSize += sizeof(flowList);
 179 #endif // MEASURE_BLOCK_SIZE
 180
 181             flow->flNext  = backedgeList;
 182             flow->flBlock = pred->flBlock;
 183             backedgeList  = flow;
 184         }
 185     }
 186
 187     /* At least one backedge must have been found (the one from endBlk) */
 188     noway_assert(backedgeList);
 189
 190     BasicBlock* curBlk = begBlk;
 191
 192     while (true)
 193     {
 194         noway_assert(curBlk);
 195
 196         // For curBlk to be part of a loop that starts at begBlk
 197         // curBlk must be reachable from begBlk and (since this is a loop)
 198         // likewise begBlk must be reachable from curBlk.
 199         //
 200
 201         if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
 202         {
 203             /* If this block reaches any of the backedge blocks we set reachable   */
 204             /* If this block dominates any of the backedge blocks we set dominates */
 205             bool reachable = false;
 206             bool dominates = false;
 207
 208             for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
 209             {
 210                 BasicBlock* backedge = tmp->flBlock;
 211
 212                 if (!curBlk->isRunRarely())
 213                 {
 214                     reachable |= fgReachable(curBlk, backedge);
 215                     dominates |= fgDominate(curBlk, backedge);
 216
 217                     if (dominates && reachable)
 218                     {
 219                         break;
 220                     }
 221                 }
 222             }
 223
 224             if (reachable)
 225             {
 226                 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
 227
 228                 unsigned weight;
 229
 230                 if (curBlk->hasProfileWeight())
 231                 {
 232                     // We have real profile weights, so we aren't going to change this blocks weight
 233                     weight = curBlk->bbWeight;
 234                 }
 235                 else
 236                 {
 237                     if (dominates)
 238                     {
 239                         weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
 240                     }
 241                     else
 242                     {
 243                         weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
 244                     }
 245
 246                     //
 247                     // The multiplication may have caused us to overflow
 248                     //
 249                     if (weight < curBlk->bbWeight)
 250                     {
 251                         // The multiplication caused us to overflow
 252                         weight = BB_MAX_WEIGHT;
 253                     }
 254                     //
 255                     //  Set the new weight
 256                     //
 257                     curBlk->modifyBBWeight(weight);
 258                 }
 259 #ifdef DEBUG
 260                 if (verbose)
 261                 {
 262                     printf("\n    BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
 263                 }
 264 #endif
 265             }
 266         }
 267
 268         /* Stop if we've reached the last block in the loop */
 269
 270         if (curBlk == endBlk)
 271         {
 272             break;
 273         }
 274
 275         curBlk = curBlk->bbNext;
 276
 277         /* If we are excluding the endBlk then stop if we've reached endBlk */
 278
 279         if (excludeEndBlk && (curBlk == endBlk))
 280         {
 281             break;
 282         }
 283     }
 284 }
 285
 286 /*****************************************************************************
 287  *
 288  *   Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
 289  */
 290
 291 void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
 292 {
 293     /* A set of blocks that were previously marked as a loop are now
 294        to be unmarked, since we have decided that for some reason this
 295        loop no longer exists.
 296        Basically we are just reseting the blocks bbWeight to their
 297        previous values.
 298     */
 299
 300     noway_assert(begBlk->bbNum <= endBlk->bbNum);
 301     noway_assert(begBlk->isLoopHead());
 302
 303     noway_assert(!opts.MinOpts());
 304
 305     BasicBlock* curBlk;
 306     unsigned    backEdgeCount = 0;
 307
 308     for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
 309     {
 310         curBlk = pred->flBlock;
 311
 312         /* is this a backward edge? (from curBlk to begBlk) */
 313
 314         if (begBlk->bbNum > curBlk->bbNum)
 315         {
 316             continue;
 317         }
 318
 319         /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
 320
 321         if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
 322         {
 323             continue;
 324         }
 325
 326         backEdgeCount++;
 327     }
 328
 329     /* Only unmark the loop blocks if we have exactly one loop back edge */
 330     if (backEdgeCount != 1)
 331     {
 332 #ifdef DEBUG
 333         if (verbose)
 334         {
 335             if (backEdgeCount > 0)
 336             {
 337                 printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
 338             }
 339             else if (backEdgeCount == 0)
 340             {
 341                 printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
 342             }
 343         }
 344 #endif
 345         return;
 346     }
 347     noway_assert(backEdgeCount == 1);
 348     noway_assert(fgReachable(begBlk, endBlk));
 349
 350 #ifdef DEBUG
 351     if (verbose)
 352     {
 353         printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
 354     }
 355 #endif
 356
 357     curBlk = begBlk;
 358     while (true)
 359     {
 360         noway_assert(curBlk);
 361
 362         // For curBlk to be part of a loop that starts at begBlk
 363         // curBlk must be reachable from begBlk and (since this is a loop)
 364         // likewise begBlk must be reachable from curBlk.
 365         //
 366         if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
 367         {
 368             unsigned weight = curBlk->bbWeight;
 369
 370             // Don't unmark blocks that are set to BB_MAX_WEIGHT
 371             // Don't unmark blocks when we are using profile weights
 372             //
 373             if (!curBlk->isMaxBBWeight() && !curBlk->hasProfileWeight())
 374             {
 375                 if (!fgDominate(curBlk, endBlk))
 376                 {
 377                     weight *= 2;
 378                 }
 379                 else
 380                 {
 381                     /* Merging of blocks can disturb the Dominates
 382                        information (see RAID #46649) */
 383                     if (weight < BB_LOOP_WEIGHT)
 384                     {
 385                         weight *= 2;
 386                     }
 387                 }
 388
 389                 // We can overflow here so check for it
 390                 if (weight < curBlk->bbWeight)
 391                 {
 392                     weight = BB_MAX_WEIGHT;
 393                 }
 394
 395                 assert(weight >= BB_LOOP_WEIGHT);
 396
 397                 curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
 398             }
 399
 400 #ifdef DEBUG
 401             if (verbose)
 402             {
 403                 printf("\n    BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
 404             }
 405 #endif
 406         }
 407         /* Stop if we've reached the last block in the loop */
 408
 409         if (curBlk == endBlk)
 410         {
 411             break;
 412         }
 413
 414         curBlk = curBlk->bbNext;
 415
 416         /* Stop if we go past the last block in the loop, as it may have been deleted */
 417         if (curBlk->bbNum > endBlk->bbNum)
 418         {
 419             break;
 420         }
 421     }
 422 }
 423
 424 /*****************************************************************************************************
 425  *
 426  *  Function called to update the loop table and bbWeight before removing a block
 427  */
 428
 429 void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
 430 {
 431     if (!optLoopsMarked)
 432     {
 433         return;
 434     }
 435
 436     noway_assert(!opts.MinOpts());
 437
 438     bool removeLoop = false;
 439
 440     /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
 441     /* Special case: the block was the head of a loop - or pointing to a loop entry */
 442
 443     for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
 444     {
 445         /* Some loops may have been already removed by
 446          * loop unrolling or conditional folding */
 447
 448         if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
 449         {
 450             continue;
 451         }
 452
 453         if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
 454         {
 455             optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
 456             continue;
 457         }
 458
 459 #ifdef DEBUG
 460         if (verbose)
 461         {
 462             printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
 463             optPrintLoopInfo(loopNum);
 464         }
 465 #endif
 466
 467         /* If the loop is still in the table
 468          * any block in the loop must be reachable !!! */
 469
 470         noway_assert(optLoopTable[loopNum].lpEntry != block);
 471         noway_assert(optLoopTable[loopNum].lpBottom != block);
 472
 473         if (optLoopTable[loopNum].lpExit == block)
 474         {
 475             optLoopTable[loopNum].lpExit = nullptr;
 476             optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
 477             ;
 478         }
 479
 480         /* If this points to the actual entry in the loop
 481          * then the whole loop may become unreachable */
 482
 483         switch (block->bbJumpKind)
 484         {
 485             unsigned     jumpCnt;
 486             BasicBlock** jumpTab;
 487
 488             case BBJ_NONE:
 489             case BBJ_COND:
 490                 if (block->bbNext == optLoopTable[loopNum].lpEntry)
 491                 {
 492                     removeLoop = true;
 493                     break;
 494                 }
 495                 if (block->bbJumpKind == BBJ_NONE)
 496                 {
 497                     break;
 498                 }
 499
 500                 __fallthrough;
 501
 502             case BBJ_ALWAYS:
 503                 noway_assert(block->bbJumpDest);
 504                 if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
 505                 {
 506                     removeLoop = true;
 507                 }
 508                 break;
 509
 510             case BBJ_SWITCH:
 511                 jumpCnt = block->bbJumpSwt->bbsCount;
 512                 jumpTab = block->bbJumpSwt->bbsDstTab;
 513
 514                 do
 515                 {
 516                     noway_assert(*jumpTab);
 517                     if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
 518                     {
 519                         removeLoop = true;
 520                     }
 521                 } while (++jumpTab, --jumpCnt);
 522                 break;
 523
 524             default:
 525                 break;
 526         }
 527
 528         if (removeLoop)
 529         {
 530             /* Check if the entry has other predecessors outside the loop
 531              * TODO: Replace this when predecessors are available */
 532
 533             BasicBlock* auxBlock;
 534             for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
 535             {
 536                 /* Ignore blocks in the loop */
 537
 538                 if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
 539                     auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
 540                 {
 541                     continue;
 542                 }
 543
 544                 switch (auxBlock->bbJumpKind)
 545                 {
 546                     unsigned     jumpCnt;
 547                     BasicBlock** jumpTab;
 548
 549                     case BBJ_NONE:
 550                     case BBJ_COND:
 551                         if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
 552                         {
 553                             removeLoop = false;
 554                             break;
 555                         }
 556                         if (auxBlock->bbJumpKind == BBJ_NONE)
 557                         {
 558                             break;
 559                         }
 560
 561                         __fallthrough;
 562
 563                     case BBJ_ALWAYS:
 564                         noway_assert(auxBlock->bbJumpDest);
 565                         if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
 566                         {
 567                             removeLoop = false;
 568                         }
 569                         break;
 570
 571                     case BBJ_SWITCH:
 572                         jumpCnt = auxBlock->bbJumpSwt->bbsCount;
 573                         jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
 574
 575                         do
 576                         {
 577                             noway_assert(*jumpTab);
 578                             if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
 579                             {
 580                                 removeLoop = false;
 581                             }
 582                         } while (++jumpTab, --jumpCnt);
 583                         break;
 584
 585                     default:
 586                         break;
 587                 }
 588             }
 589
 590             if (removeLoop)
 591             {
 592                 optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
 593             }
 594         }
 595         else if (optLoopTable[loopNum].lpHead == block)
 596         {
 597             /* The loop has a new head - Just update the loop table */
 598             optLoopTable[loopNum].lpHead = block->bbPrev;
 599         }
 600
 601 #ifdef DEBUG
 602         if (verbose)
 603         {
 604             printf("\nUpdateLoopsBeforeRemoveBlock After: ");
 605             optPrintLoopInfo(loopNum);
 606         }
 607 #endif
 608     }
 609
 610     if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
 611         (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
 612         (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
 613     {
 614         optUnmarkLoopBlocks(block->bbJumpDest, block);
 615     }
 616 }
 617
 618 #ifdef DEBUG
 619
 620 /*****************************************************************************
 621  *
 622  *  Given the beginBlock of the loop, return the index of this loop
 623  *  to the loop table.
 624  */
 625
 626 unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
 627 {
 628     unsigned lnum = 0;
 629
 630     for (lnum = 0; lnum < optLoopCount; lnum++)
 631     {
 632         if (optLoopTable[lnum].lpHead->bbNext == begBlk)
 633         {
 634             // Found the loop.
 635             return lnum;
 636         }
 637     }
 638
 639     noway_assert(!"Loop number not found.");
 640
 641     return optLoopCount;
 642 }
 643
 644 /*****************************************************************************
 645  *
 646  *  Print loop info in an uniform way.
 647  */
 648
 649 void Compiler::optPrintLoopInfo(unsigned      loopInd,
 650                                 BasicBlock*   lpHead,
 651                                 BasicBlock*   lpFirst,
 652                                 BasicBlock*   lpTop,
 653                                 BasicBlock*   lpEntry,
 654                                 BasicBlock*   lpBottom,
 655                                 unsigned char lpExitCnt,
 656                                 BasicBlock*   lpExit,
 657                                 unsigned      parentLoop)
 658 {
 659     noway_assert(lpHead);
 660
 661     //
 662     // NOTE: we take "loopInd" as an argument instead of using the one
 663     //       stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
 664     //       has not be set correctly. For example, in optRecordLoop().
 665     //       However, in most of the cases, loops should have been recorded.
 666     //       Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
 667     //       version of this method.
 668     //
 669     printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
 670     if (lpTop != lpFirst)
 671     {
 672         printf(" (loop top is BB%02u)", lpTop->bbNum);
 673     }
 674
 675     printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
 676            lpExitCnt);
 677
 678     if (lpExitCnt == 1)
 679     {
 680         printf(" at BB%02u", lpExit->bbNum);
 681     }
 682
 683     if (parentLoop != BasicBlock::NOT_IN_LOOP)
 684     {
 685         printf(", parent loop = L%02u", parentLoop);
 686     }
 687     printf(")");
 688 }
 689
 690 /*****************************************************************************
 691  *
 692  *  Print loop information given the index of the loop in the loop table.
 693  */
 694
 695 void Compiler::optPrintLoopInfo(unsigned lnum)
 696 {
 697     noway_assert(lnum < optLoopCount);
 698
 699     LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
 700
 701     optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
 702                      ldsc->lpExit, ldsc->lpParent);
 703 }
 704
 705 #endif
 706
 707 //------------------------------------------------------------------------
 708 // optPopulateInitInfo: Populate loop init info in the loop table.
 709 //
 710 // Arguments:
 711 //     init     -  the tree that is supposed to initialize the loop iterator.
 712 //     iterVar  -  loop iteration variable.
 713 //
 714 // Return Value:
 715 //     "false" if the loop table could not be populated with the loop iterVar init info.
 716 //
 717 // Operation:
 718 //     The 'init' tree is checked if its lhs is a local and rhs is either
 719 //     a const or a local.
 720 //
 721 bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTree* init, unsigned iterVar)
 722 {
 723     // Operator should be =
 724     if (init->gtOper != GT_ASG)
 725     {
 726         return false;
 727     }
 728
 729     GenTree* lhs = init->gtOp.gtOp1;
 730     GenTree* rhs = init->gtOp.gtOp2;
 731     // LHS has to be local and should equal iterVar.
 732     if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
 733     {
 734         return false;
 735     }
 736
 737     // RHS can be constant or local var.
 738     // TODO-CQ: CLONE: Add arr length for descending loops.
 739     if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
 740     {
 741         optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
 742         optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
 743     }
 744     else if (rhs->gtOper == GT_LCL_VAR)
 745     {
 746         optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
 747         optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
 748     }
 749     else
 750     {
 751         return false;
 752     }
 753     return true;
 754 }
 755
 756 //----------------------------------------------------------------------------------
 757 // optCheckIterInLoopTest: Check if iter var is used in loop test.
 758 //
 759 // Arguments:
 760 //      test          "jtrue" tree or an asg of the loop iter termination condition
 761 //      from/to       blocks (beg, end) which are part of the loop.
 762 //      iterVar       loop iteration variable.
 763 //      loopInd       loop index.
 764 //
 765 //  Operation:
 766 //      The test tree is parsed to check if "iterVar" matches the lhs of the condition
 767 //      and the rhs limit is extracted from the "test" tree. The limit information is
 768 //      added to the loop table.
 769 //
 770 //  Return Value:
 771 //      "false" if the loop table could not be populated with the loop test info or
 772 //      if the test condition doesn't involve iterVar.
 773 //
 774 bool Compiler::optCheckIterInLoopTest(
 775     unsigned loopInd, GenTree* test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
 776 {
 777     // Obtain the relop from the "test" tree.
 778     GenTree* relop;
 779     if (test->gtOper == GT_JTRUE)
 780     {
 781         relop = test->gtGetOp1();
 782     }
 783     else
 784     {
 785         assert(test->gtOper == GT_ASG);
 786         relop = test->gtGetOp2();
 787     }
 788
 789     noway_assert(relop->OperKind() & GTK_RELOP);
 790
 791     GenTree* opr1 = relop->gtOp.gtOp1;
 792     GenTree* opr2 = relop->gtOp.gtOp2;
 793
 794     GenTree* iterOp;
 795     GenTree* limitOp;
 796
 797     // Make sure op1 or op2 is the iterVar.
 798     if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
 799     {
 800         iterOp  = opr1;
 801         limitOp = opr2;
 802     }
 803     else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
 804     {
 805         iterOp  = opr2;
 806         limitOp = opr1;
 807     }
 808     else
 809     {
 810         return false;
 811     }
 812
 813     if (iterOp->gtType != TYP_INT)
 814     {
 815         return false;
 816     }
 817
 818     // Mark the iterator node.
 819     iterOp->gtFlags |= GTF_VAR_ITERATOR;
 820
 821     // Check what type of limit we have - constant, variable or arr-len.
 822     if (limitOp->gtOper == GT_CNS_INT)
 823     {
 824         optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
 825         if ((limitOp->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
 826         {
 827             optLoopTable[loopInd].lpFlags |= LPFLG_SIMD_LIMIT;
 828         }
 829     }
 830     else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
 831     {
 832         optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
 833     }
 834     else if (limitOp->gtOper == GT_ARR_LENGTH)
 835     {
 836         optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
 837     }
 838     else
 839     {
 840         return false;
 841     }
 842     // Save the type of the comparison between the iterator and the limit.
 843     optLoopTable[loopInd].lpTestTree = relop;
 844     return true;
 845 }
 846
 847 //----------------------------------------------------------------------------------
 848 // optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
 849 //
 850 // Arguments:
 851 //      incr        The incr tree to be checked. Whether incr tree is
 852 //                  oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
 853 //
 854 //  Operation:
 855 //      The test tree is parsed to check if "iterVar" matches the lhs of the condition
 856 //      and the rhs limit is extracted from the "test" tree. The limit information is
 857 //      added to the loop table.
 858 //
 859 //  Return Value:
 860 //      iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
 861 //
 862 unsigned Compiler::optIsLoopIncrTree(GenTree* incr)
 863 {
 864     GenTree*   incrVal;
 865     genTreeOps updateOper;
 866     unsigned   iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
 867     if (iterVar != BAD_VAR_NUM)
 868     {
 869         // We have v = v op y type asg node.
 870         switch (updateOper)
 871         {
 872             case GT_ADD:
 873             case GT_SUB:
 874             case GT_MUL:
 875             case GT_RSH:
 876             case GT_LSH:
 877                 break;
 878             default:
 879                 return BAD_VAR_NUM;
 880         }
 881
 882         // Increment should be by a const int.
 883         // TODO-CQ: CLONE: allow variable increments.
 884         if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
 885         {
 886             return BAD_VAR_NUM;
 887         }
 888     }
 889
 890     return iterVar;
 891 }
 892
 893 //----------------------------------------------------------------------------------
 894 // optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
 895 //
 896 // Arguments:
 897 //      from, to    - are blocks (beg, end) which are part of the loop.
 898 //      incr        - tree that increments the loop iterator. v+=1 or v=v+1.
 899 //      pIterVar    - see return value.
 900 //
 901 //  Return Value:
 902 //      Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
 903 //      false.
 904 //
 905 //  Operation:
 906 //      Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
 907 //      assigned in the loop.
 908 //
 909 bool Compiler::optComputeIterInfo(GenTree* incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
 910 {
 911
 912     unsigned iterVar = optIsLoopIncrTree(incr);
 913     if (iterVar == BAD_VAR_NUM)
 914     {
 915         return false;
 916     }
 917     if (optIsVarAssigned(from, to, incr, iterVar))
 918     {
 919         JITDUMP("iterVar is assigned in loop\n");
 920         return false;
 921     }
 922
 923     *pIterVar = iterVar;
 924     return true;
 925 }
 926
 927 //----------------------------------------------------------------------------------
 928 // optIsLoopTestEvalIntoTemp:
 929 //      Pattern match if the test tree is computed into a tmp
 930 //      and the "tmp" is used as jump condition for loop termination.
 931 //
 932 // Arguments:
 933 //      testStmt    - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
 934 //                    where Vtmp contains the actual loop test result.
 935 //      newStmt     - contains the statement that is the actual test stmt involving
 936 //                    the loop iterator.
 937 //
 938 //  Return Value:
 939 //      Returns true if a new test tree can be obtained.
 940 //
 941 //  Operation:
 942 //      Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
 943 //      Then returns the rhs for def of Vtmp as the "test" node.
 944 //
 945 //  Note:
 946 //      This method just retrieves what it thinks is the "test" node,
 947 //      the callers are expected to verify that "iterVar" is used in the test.
 948 //
 949 bool Compiler::optIsLoopTestEvalIntoTemp(GenTree* testStmt, GenTree** newTest)
 950 {
 951     GenTree* test = testStmt->gtStmt.gtStmtExpr;
 952
 953     if (test->gtOper != GT_JTRUE)
 954     {
 955         return false;
 956     }
 957
 958     GenTree* relop = test->gtGetOp1();
 959     noway_assert(relop->OperIsCompare());
 960
 961     GenTree* opr1 = relop->gtOp.gtOp1;
 962     GenTree* opr2 = relop->gtOp.gtOp2;
 963
 964     // Make sure we have jtrue (vtmp != 0)
 965     if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
 966         opr2->IsIntegralConst(0))
 967     {
 968         // Get the previous statement to get the def (rhs) of Vtmp to see
 969         // if the "test" is evaluated into Vtmp.
 970         GenTree* prevStmt = testStmt->gtPrev;
 971         if (prevStmt == nullptr)
 972         {
 973             return false;
 974         }
 975
 976         GenTree* tree = prevStmt->gtStmt.gtStmtExpr;
 977         if (tree->OperGet() == GT_ASG)
 978         {
 979             GenTree* lhs = tree->gtOp.gtOp1;
 980             GenTree* rhs = tree->gtOp.gtOp2;
 981
 982             // Return as the new test node.
 983             if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
 984             {
 985                 if (rhs->OperIsCompare())
 986                 {
 987                     *newTest = prevStmt;
 988                     return true;
 989                 }
 990             }
 991         }
 992     }
 993     return false;
 994 }
 995
 996 //----------------------------------------------------------------------------------
 997 // optExtractInitTestIncr:
 998 //      Extract the "init", "test" and "incr" nodes of the loop.
 999 //
1000 // Arguments:
1001 //      head    - Loop head block
1002 //      bottom  - Loop bottom block
1003 //      top     - Loop top block
1004 //      ppInit  - The init stmt of the loop if found.
1005 //      ppTest  - The test stmt of the loop if found.
1006 //      ppIncr  - The incr stmt of the loop if found.
1007 //
1008 //  Return Value:
1009 //      The results are put in "ppInit", "ppTest" and "ppIncr" if the method
1010 //      returns true. Returns false if the information can't be extracted.
1011 //
1012 //  Operation:
1013 //      Check if the "test" stmt is last stmt in the loop "bottom". If found good,
1014 //      "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
1015 //      "test" to get the "incr" stmt. If it is not found it could be a loop of the
1016 //      below form.
1017 //
1018 //                     +-------<-----------------<-----------+
1019 //                     |                                     |
1020 //                     v                                     |
1021 //      BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
1022 //
1023 //      Check if the "incr" tree is present in the loop "top" node as the last stmt.
1024 //      Also check if the "test" tree is assigned to a tmp node and the tmp is used
1025 //      in the jtrue condition.
1026 //
1027 //  Note:
1028 //      This method just retrieves what it thinks is the "test" node,
1029 //      the callers are expected to verify that "iterVar" is used in the test.
1030 //
1031 bool Compiler::optExtractInitTestIncr(
1032     BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTree** ppInit, GenTree** ppTest, GenTree** ppIncr)
1033 {
1034     assert(ppInit != nullptr);
1035     assert(ppTest != nullptr);
1036     assert(ppIncr != nullptr);
1037
1038     // Check if last two statements in the loop body are the increment of the iterator
1039     // and the loop termination test.
1040     noway_assert(bottom->bbTreeList != nullptr);
1041     GenTree* test = bottom->bbTreeList->gtPrev;
1042     noway_assert(test != nullptr && test->gtNext == nullptr);
1043
1044     GenTree* newTest;
1045     if (optIsLoopTestEvalIntoTemp(test, &newTest))
1046     {
1047         test = newTest;
1048     }
1049
1050     // Check if we have the incr tree before the test tree, if we don't,
1051     // check if incr is part of the loop "top".
1052     GenTree* incr = test->gtPrev;
1053     if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
1054     {
1055         if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
1056         {
1057             return false;
1058         }
1059
1060         // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
1061         GenTree* topLast = top->bbTreeList->gtPrev;
1062         if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
1063         {
1064             incr = topLast;
1065         }
1066         else
1067         {
1068             return false;
1069         }
1070     }
1071
1072     assert(test != incr);
1073
1074     // Find the last statement in the loop pre-header which we expect to be the initialization of
1075     // the loop iterator.
1076     GenTree* phdr = head->bbTreeList;
1077     if (phdr == nullptr)
1078     {
1079         return false;
1080     }
1081
1082     GenTree* init = phdr->gtPrev;
1083     noway_assert(init != nullptr && (init->gtNext == nullptr));
1084
1085     // If it is a duplicated loop condition, skip it.
1086     if (init->gtFlags & GTF_STMT_CMPADD)
1087     {
1088         bool doGetPrev = true;
1089 #ifdef DEBUG
1090         if (opts.optRepeat)
1091         {
1092             // Previous optimization passes may have inserted compiler-generated
1093             // statements other than duplicated loop conditions.
1094             doGetPrev = (init->gtPrev != nullptr);
1095         }
1096         else
1097         {
1098             // Must be a duplicated loop condition.
1099             noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
1100         }
1101 #endif // DEBUG
1102         if (doGetPrev)
1103         {
1104             init = init->gtPrev;
1105         }
1106         noway_assert(init != nullptr);
1107     }
1108
1109     noway_assert(init->gtOper == GT_STMT);
1110     noway_assert(test->gtOper == GT_STMT);
1111     noway_assert(incr->gtOper == GT_STMT);
1112
1113     *ppInit = init->gtStmt.gtStmtExpr;
1114     *ppTest = test->gtStmt.gtStmtExpr;
1115     *ppIncr = incr->gtStmt.gtStmtExpr;
1116
1117     return true;
1118 }
1119
1120 /*****************************************************************************
1121  *
1122  *  Record the loop in the loop table.  Return true if successful, false if
1123  *  out of entries in loop table.
1124  */
1125
1126 bool Compiler::optRecordLoop(BasicBlock*   head,
1127                              BasicBlock*   first,
1128                              BasicBlock*   top,
1129                              BasicBlock*   entry,
1130                              BasicBlock*   bottom,
1131                              BasicBlock*   exit,
1132                              unsigned char exitCnt)
1133 {
1134     // Record this loop in the table, if there's room.
1135
1136     assert(optLoopCount <= MAX_LOOP_NUM);
1137     if (optLoopCount == MAX_LOOP_NUM)
1138     {
1139 #if COUNT_LOOPS
1140         loopOverflowThisMethod = true;
1141 #endif
1142         return false;
1143     }
1144
1145     // Assumed preconditions on the loop we're adding.
1146     assert(first->bbNum <= top->bbNum);
1147     assert(top->bbNum <= entry->bbNum);
1148     assert(entry->bbNum <= bottom->bbNum);
1149     assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
1150
1151     // If the new loop contains any existing ones, add it in the right place.
1152     unsigned char loopInd = optLoopCount;
1153     for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
1154     {
1155         unsigned char prev = prevPlus1 - 1;
1156         if (optLoopTable[prev].lpContainedBy(first, bottom))
1157         {
1158             loopInd = prev;
1159         }
1160     }
1161     // Move up any loops if necessary.
1162     for (unsigned j = optLoopCount; j > loopInd; j--)
1163     {
1164         optLoopTable[j] = optLoopTable[j - 1];
1165     }
1166
1167 #ifdef DEBUG
1168     for (unsigned i = loopInd + 1; i < optLoopCount; i++)
1169     {
1170         // The loop is well-formed.
1171         assert(optLoopTable[i].lpWellFormed());
1172         // Check for disjoint.
1173         if (optLoopTable[i].lpDisjoint(first, bottom))
1174         {
1175             continue;
1176         }
1177         // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
1178         assert(optLoopTable[i].lpContainedBy(first, bottom));
1179     }
1180 #endif // DEBUG
1181
1182     optLoopTable[loopInd].lpHead    = head;
1183     optLoopTable[loopInd].lpFirst   = first;
1184     optLoopTable[loopInd].lpTop     = top;
1185     optLoopTable[loopInd].lpBottom  = bottom;
1186     optLoopTable[loopInd].lpEntry   = entry;
1187     optLoopTable[loopInd].lpExit    = exit;
1188     optLoopTable[loopInd].lpExitCnt = exitCnt;
1189
1190     optLoopTable[loopInd].lpParent  = BasicBlock::NOT_IN_LOOP;
1191     optLoopTable[loopInd].lpChild   = BasicBlock::NOT_IN_LOOP;
1192     optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
1193
1194     optLoopTable[loopInd].lpFlags = 0;
1195
1196     // We haven't yet recorded any side effects.
1197     for (MemoryKind memoryKind : allMemoryKinds())
1198     {
1199         optLoopTable[loopInd].lpLoopHasMemoryHavoc[memoryKind] = false;
1200     }
1201     optLoopTable[loopInd].lpFieldsModified         = nullptr;
1202     optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
1203
1204     // If DO-WHILE loop mark it as such.
1205     if (head->bbNext == entry)
1206     {
1207         optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
1208     }
1209
1210     // If single exit loop mark it as such.
1211     if (exitCnt == 1)
1212     {
1213         noway_assert(exit);
1214         optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
1215     }
1216
1217     //
1218     // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
1219     // We have the following restrictions:
1220     //     1. The loop condition must be a simple one i.e. only one JTRUE node
1221     //     2. There must be a loop iterator (a local var) that is
1222     //        incremented (decremented or lsh, rsh, mul) with a constant value
1223     //     3. The iterator is incremented exactly once
1224     //     4. The loop condition must use the iterator.
1225     //
1226     if (bottom->bbJumpKind == BBJ_COND)
1227     {
1228         GenTree* init;
1229         GenTree* test;
1230         GenTree* incr;
1231         if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
1232         {
1233             goto DONE_LOOP;
1234         }
1235
1236         unsigned iterVar = BAD_VAR_NUM;
1237         if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
1238         {
1239             goto DONE_LOOP;
1240         }
1241
1242         // Make sure the "iterVar" initialization is never skipped,
1243         // i.e. every pred of ENTRY other than HEAD is in the loop.
1244         for (flowList* predEdge = entry->bbPreds; predEdge; predEdge = predEdge->flNext)
1245         {
1246             BasicBlock* predBlock = predEdge->flBlock;
1247             if ((predBlock != head) && !optLoopTable[loopInd].lpContains(predBlock))
1248             {
1249                 goto DONE_LOOP;
1250             }
1251         }
1252
1253         if (!optPopulateInitInfo(loopInd, init, iterVar))
1254         {
1255             goto DONE_LOOP;
1256         }
1257
1258         // Check that the iterator is used in the loop condition.
1259         if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
1260         {
1261             goto DONE_LOOP;
1262         }
1263
1264         // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
1265         // Record the iterator, the pointer to the test node
1266         // and the initial value of the iterator (constant or local var)
1267         optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
1268
1269         // Record iterator.
1270         optLoopTable[loopInd].lpIterTree = incr;
1271
1272 #if COUNT_LOOPS
1273         // Save the initial value of the iterator - can be lclVar or constant
1274         // Flag the loop accordingly.
1275
1276         iterLoopCount++;
1277 #endif
1278
1279 #if COUNT_LOOPS
1280         simpleTestLoopCount++;
1281 #endif
1282
1283         // Check if a constant iteration loop.
1284         if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
1285         {
1286             // This is a constant loop.
1287             optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
1288 #if COUNT_LOOPS
1289             constIterLoopCount++;
1290 #endif
1291         }
1292
1293 #ifdef DEBUG
1294         if (verbose && 0)
1295         {
1296             printf("\nConstant loop initializer:\n");
1297             gtDispTree(init);
1298
1299             printf("\nConstant loop body:\n");
1300
1301             BasicBlock* block = head;
1302             do
1303             {
1304                 block = block->bbNext;
1305                 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
1306                 {
1307                     if (stmt->gtStmt.gtStmtExpr == incr)
1308                     {
1309                         break;
1310                     }
1311                     printf("\n");
1312                     gtDispTree(stmt->gtStmt.gtStmtExpr);
1313                 }
1314             } while (block != bottom);
1315         }
1316 #endif // DEBUG
1317     }
1318
1319 DONE_LOOP:
1320     DBEXEC(verbose, optPrintLoopRecording(loopInd));
1321     optLoopCount++;
1322     return true;
1323 }
1324
1325 #ifdef DEBUG
1326 //------------------------------------------------------------------------
1327 // optPrintLoopRecording: Print a recording of the loop.
1328 //
1329 // Arguments:
1330 //      loopInd     - loop index.
1331 //
1332 void Compiler::optPrintLoopRecording(unsigned loopInd)
1333 {
1334     printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
1335     optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
1336                      optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
1337                      optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
1338                      optLoopTable[loopInd].lpExit);
1339
1340     // If an iterator loop print the iterator and the initialization.
1341     if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
1342     {
1343         printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
1344         printf(" (");
1345         printf(GenTree::OpName(optLoopTable[loopInd].lpIterOper()));
1346         printf(" ");
1347         printf("%d )", optLoopTable[loopInd].lpIterConst());
1348
1349         if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
1350         {
1351             printf(" from %d", optLoopTable[loopInd].lpConstInit);
1352         }
1353         if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
1354         {
1355             printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
1356         }
1357
1358         // If a simple test condition print operator and the limits */
1359         printf(GenTree::OpName(optLoopTable[loopInd].lpTestOper()));
1360
1361         if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
1362         {
1363             printf("%d ", optLoopTable[loopInd].lpConstLimit());
1364         }
1365
1366         if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
1367         {
1368             printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
1369         }
1370
1371         printf("]");
1372     }
1373
1374     printf("\n");
1375 }
1376
1377 void Compiler::optCheckPreds()
1378 {
1379     BasicBlock* block;
1380     BasicBlock* blockPred;
1381     flowList*   pred;
1382
1383     for (block = fgFirstBB; block; block = block->bbNext)
1384     {
1385         for (pred = block->bbPreds; pred; pred = pred->flNext)
1386         {
1387             // make sure this pred is part of the BB list
1388             for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
1389             {
1390                 if (blockPred == pred->flBlock)
1391                 {
1392                     break;
1393                 }
1394             }
1395             noway_assert(blockPred);
1396             switch (blockPred->bbJumpKind)
1397             {
1398                 case BBJ_COND:
1399                     if (blockPred->bbJumpDest == block)
1400                     {
1401                         break;
1402                     }
1403                     __fallthrough;
1404                 case BBJ_NONE:
1405                     noway_assert(blockPred->bbNext == block);
1406                     break;
1407                 case BBJ_EHFILTERRET:
1408                 case BBJ_ALWAYS:
1409                 case BBJ_EHCATCHRET:
1410                     noway_assert(blockPred->bbJumpDest == block);
1411                     break;
1412                 default:
1413                     break;
1414             }
1415         }
1416     }
1417 }
1418
1419 #endif // DEBUG
1420
1421 namespace
1422 {
1423 //------------------------------------------------------------------------
1424 // LoopSearch: Class that handles scanning a range of blocks to detect a loop,
1425 //             moving blocks to make the loop body contiguous, and recording
1426 //             the loop.
1427 //
1428 // We will use the following terminology:
1429 //   HEAD    - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
1430 //             Not part of the looping of the loop.
1431 //   FIRST   - the lexically first basic block (in bbNext order) within this loop.
1432 //   TOP     - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
1433 //   BOTTOM  - the lexically last block in the loop (i.e. the block from which we jump to the top)
1434 //   EXIT    - the predecessor of loop's unique exit edge, if it has a unique exit edge; else nullptr
1435 //   ENTRY   - the entry in the loop (not necessarly the TOP), but there must be only one entry
1436 //
1437 //   We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
1438 //   When the loop is identified, blocks will be moved out to make it a compact contiguous region if possible,
1439 //   and in cases where compaction is not possible, we'll subsequently treat all blocks in the lexical range
1440 //   between TOP and BOTTOM as part of the loop even if they aren't part of the SCC.
1441 //   Regarding nesting:  Since a given block can only have one back-edge (we only detect loops with back-edges
1442 //   from BBJ_COND or BBJ_ALWAYS blocks), no two loops will share the same BOTTOM.  Two loops may share the
1443 //   same FIRST/TOP/ENTRY as reported by LoopSearch, and optCanonicalizeLoopNest will subsequently re-write
1444 //   the CFG so that no two loops share the same FIRST/TOP/ENTRY anymore.
1445 //
1446 //        |
1447 //        v
1448 //      head
1449 //        |
1450 //        |  top/first <--+
1451 //        |       |       |
1452 //        |      ...      |
1453 //        |       |       |
1454 //        |       v       |
1455 //        +---> entry     |
1456 //                |       |
1457 //               ...      |
1458 //                |       |
1459 //                v       |
1460 //         +-- exit/tail  |
1461 //         |      |       |
1462 //         |     ...      |
1463 //         |      |       |
1464 //         |      v       |
1465 //         |    bottom ---+
1466 //         |
1467 //         +------+
1468 //                |
1469 //                v
1470 //
1471 class LoopSearch
1472 {
1473
1474     // Keeping track of which blocks are in the loop requires two block sets since we may add blocks
1475     // as we go but the BlockSet type's max ID doesn't increase to accomodate them.  Define a helper
1476     // struct to make the ensuing code more readable.
1477     struct LoopBlockSet
1478     {
1479     private:
1480         // Keep track of blocks with bbNum <= oldBlockMaxNum in a regular BlockSet, since
1481         // it can hold all of them.
1482         BlockSet oldBlocksInLoop; // Blocks with bbNum <= oldBlockMaxNum
1483
1484         // Keep track of blocks with bbNum > oldBlockMaxNum in a separate BlockSet, but
1485         // indexing them by (blockNum - oldBlockMaxNum); since we won't generate more than
1486         // one new block per old block, this must be sufficient to track any new blocks.
1487         BlockSet newBlocksInLoop; // Blocks with bbNum > oldBlockMaxNum
1488
1489         Compiler*    comp;
1490         unsigned int oldBlockMaxNum;
1491
1492     public:
1493         LoopBlockSet(Compiler* comp)
1494             : oldBlocksInLoop(BlockSetOps::UninitVal())
1495             , newBlocksInLoop(BlockSetOps::UninitVal())
1496             , comp(comp)
1497             , oldBlockMaxNum(comp->fgBBNumMax)
1498         {
1499         }
1500
1501         void Reset(unsigned int seedBlockNum)
1502         {
1503             if (BlockSetOps::MayBeUninit(oldBlocksInLoop))
1504             {
1505                 // Either the block sets are uninitialized (and long), so we need to initialize
1506                 // them (and allocate their backing storage), or they are short and empty, so
1507                 // assigning MakeEmpty to them is as cheap as ClearD.
1508                 oldBlocksInLoop = BlockSetOps::MakeEmpty(comp);
1509                 newBlocksInLoop = BlockSetOps::MakeEmpty(comp);
1510             }
1511             else
1512             {
1513                 // We know the backing storage is already allocated, so just clear it.
1514                 BlockSetOps::ClearD(comp, oldBlocksInLoop);
1515                 BlockSetOps::ClearD(comp, newBlocksInLoop);
1516             }
1517             assert(seedBlockNum <= oldBlockMaxNum);
1518             BlockSetOps::AddElemD(comp, oldBlocksInLoop, seedBlockNum);
1519         }
1520
1521         bool CanRepresent(unsigned int blockNum)
1522         {
1523             // We can represent old blocks up to oldBlockMaxNum, and
1524             // new blocks up to 2 * oldBlockMaxNum.
1525             return (blockNum <= 2 * oldBlockMaxNum);
1526         }
1527
1528         bool IsMember(unsigned int blockNum)
1529         {
1530             if (blockNum > oldBlockMaxNum)
1531             {
1532                 return BlockSetOps::IsMember(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
1533             }
1534             return BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum);
1535         }
1536
1537         void Insert(unsigned int blockNum)
1538         {
1539             if (blockNum > oldBlockMaxNum)
1540             {
1541                 BlockSetOps::AddElemD(comp, newBlocksInLoop, blockNum - oldBlockMaxNum);
1542             }
1543             else
1544             {
1545                 BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
1546             }
1547         }
1548
1549         bool TestAndInsert(unsigned int blockNum)
1550         {
1551             if (blockNum > oldBlockMaxNum)
1552             {
1553                 unsigned int shiftedNum = blockNum - oldBlockMaxNum;
1554                 if (!BlockSetOps::IsMember(comp, newBlocksInLoop, shiftedNum))
1555                 {
1556                     BlockSetOps::AddElemD(comp, newBlocksInLoop, shiftedNum);
1557                     return false;
1558                 }
1559             }
1560             else
1561             {
1562                 if (!BlockSetOps::IsMember(comp, oldBlocksInLoop, blockNum))
1563                 {
1564                     BlockSetOps::AddElemD(comp, oldBlocksInLoop, blockNum);
1565                     return false;
1566                 }
1567             }
1568             return true;
1569         }
1570     };
1571
1572     LoopBlockSet loopBlocks; // Set of blocks identified as part of the loop
1573     Compiler*    comp;
1574
1575     // See LoopSearch class comment header for a diagram relating these fields:
1576     BasicBlock* head;   // Predecessor of unique entry edge
1577     BasicBlock* first;  // Lexically first in-loop block
1578     BasicBlock* top;    // Successor of back-edge from BOTTOM
1579     BasicBlock* bottom; // Predecessor of back-edge to TOP, also lexically last in-loop block
1580     BasicBlock* entry;  // Successor of unique entry edge
1581
1582     BasicBlock*   lastExit;       // Most recently discovered exit block
1583     unsigned char exitCount;      // Number of discovered exit edges
1584     unsigned int  oldBlockMaxNum; // Used to identify new blocks created during compaction
1585     BlockSet      bottomBlocks;   // BOTTOM blocks of already-recorded loops
1586 #ifdef DEBUG
1587     bool forgotExit = false; // Flags a rare case where lastExit gets nulled out, for assertions
1588 #endif
1589     bool changedFlowGraph = false; // Signals that loop compaction has modified the flow graph
1590
1591 public:
1592     LoopSearch(Compiler* comp)
1593         : loopBlocks(comp), comp(comp), oldBlockMaxNum(comp->fgBBNumMax), bottomBlocks(BlockSetOps::MakeEmpty(comp))
1594     {
1595         // Make sure we've renumbered such that the bitsets can hold all the bits
1596         assert(comp->fgBBNumMax <= comp->fgCurBBEpochSize);
1597     }
1598
1599     //------------------------------------------------------------------------
1600     // RecordLoop: Notify the Compiler that a loop has been found.
1601     //
1602     // Return Value:
1603     //    true  - Loop successfully recorded.
1604     //    false - Compiler has run out of loop descriptors; loop not recorded.
1605     //
1606     bool RecordLoop()
1607     {
1608         /* At this point we have a compact loop - record it in the loop table
1609         * If we found only one exit, record it in the table too
1610         * (otherwise an exit = nullptr in the loop table means multiple exits) */
1611
1612         BasicBlock* onlyExit = (exitCount == 1 ? lastExit : nullptr);
1613         if (comp->optRecordLoop(head, first, top, entry, bottom, onlyExit, exitCount))
1614         {
1615             // Record the BOTTOM block for future reference before returning.
1616             assert(bottom->bbNum <= oldBlockMaxNum);
1617             BlockSetOps::AddElemD(comp, bottomBlocks, bottom->bbNum);
1618             return true;
1619         }
1620
1621         // Unable to record this loop because the loop descriptor table overflowed.
1622         return false;
1623     }
1624
1625     //------------------------------------------------------------------------
1626     // ChangedFlowGraph: Determine whether loop compaction has modified the flow graph.
1627     //
1628     // Return Value:
1629     //    true  - The flow graph has been modified; fgUpdateChangedFlowGraph should
1630     //            be called (which is the caller's responsibility).
1631     //    false - The flow graph has not been modified by this LoopSearch.
1632     //
1633     bool ChangedFlowGraph()
1634     {
1635         return changedFlowGraph;
1636     }
1637
1638     //------------------------------------------------------------------------
1639     // FindLoop: Search for a loop with the given HEAD block and back-edge.
1640     //
1641     // Arguments:
1642     //    head - Block to be the HEAD of any loop identified
1643     //    top - Block to be the TOP of any loop identified
1644     //    bottom - Block to be the BOTTOM of any loop identified
1645     //
1646     // Return Value:
1647     //    true  - Found a valid loop.
1648     //    false - Did not find a valid loop.
1649     //
1650     // Notes:
1651     //    May modify flow graph to make loop compact before returning.
1652     //    Will set instance fields to track loop's extent and exits if a valid
1653     //    loop is found, and potentially trash them otherwise.
1654     //
1655     bool FindLoop(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
1656     {
1657         /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
1658         * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
1659         * as the definition says, but merely an indication that we have a loop there).
1660         * Thus, we have to be very careful and after entry discovery check that it is indeed
1661         * the only place we enter the loop (especially for non-reducible flow graphs).
1662         */
1663
1664         if (top->bbNum > bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
1665         {
1666             // Edge from BOTTOM to TOP is not a backward edge
1667             return false;
1668         }
1669
1670         if (bottom->bbNum > oldBlockMaxNum)
1671         {
1672             // Not a true back-edge; bottom is a block added to reconnect fall-through during
1673             // loop processing, so its block number does not reflect its position.
1674             return false;
1675         }
1676
1677         if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
1678             (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
1679             (bottom->bbJumpKind == BBJ_SWITCH))
1680         {
1681             /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
1682             * BBJ_SWITCH that has a backward jump appears only for labeled break. */
1683             return false;
1684         }
1685
1686         /* The presence of a "back edge" is an indication that a loop might be present here
1687         *
1688         * LOOP:
1689         *        1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
1690         *           node in the loop to any other node in the loop (wholly within the loop)
1691         *        2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
1692         *           in the loop from outside the loop, and that is through the ENTRY
1693         */
1694
1695         /* Let's find the loop ENTRY */
1696         BasicBlock* entry = FindEntry(head, top, bottom);
1697
1698         if (entry == nullptr)
1699         {
1700             // For now, we only recognize loops where HEAD has some successor ENTRY in the loop.
1701             return false;
1702         }
1703
1704         // Passed the basic checks; initialize instance state for this back-edge.
1705         this->head      = head;
1706         this->top       = top;
1707         this->entry     = entry;
1708         this->bottom    = bottom;
1709         this->lastExit  = nullptr;
1710         this->exitCount = 0;
1711
1712         // Now we find the "first" block -- the earliest block reachable within the loop.
1713         // With our current algorithm, this is always the same as "top".
1714         this->first = top;
1715
1716         if (!HasSingleEntryCycle())
1717         {
1718             // There isn't actually a loop between TOP and BOTTOM
1719             return false;
1720         }
1721
1722         if (!loopBlocks.IsMember(top->bbNum))
1723         {
1724             // The "back-edge" we identified isn't actually part of the flow cycle containing ENTRY
1725             return false;
1726         }
1727
1728         // Disqualify loops where the first block of the loop is less nested in EH than
1729         // the bottom block. That is, we don't want to handle loops where the back edge
1730         // goes from within an EH region to a first block that is outside that same EH
1731         // region. Note that we *do* handle loops where the first block is the *first*
1732         // block of a more nested EH region (since it is legal to branch to the first
1733         // block of an immediately more nested EH region). So, for example, disqualify
1734         // this:
1735         //
1736         // BB02
1737         // ...
1738         // try {
1739         // ...
1740         // BB10 BBJ_COND => BB02
1741         // ...
1742         // }
1743         //
1744         // Here, BB10 is more nested than BB02.
1745
1746         if (bottom->hasTryIndex() && !comp->bbInTryRegions(bottom->getTryIndex(), first))
1747         {
1748             JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
1749                     "loop.\n",
1750                     first->bbNum, bottom->bbNum);
1751             return false;
1752         }
1753
1754 #if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1755         // Disqualify loops where the first block of the loop is a finally target.
1756         // The main problem is when multiple loops share a 'first' block that is a finally
1757         // target and we canonicalize the loops by adding a new loop head. In that case, we
1758         // need to update the blocks so the finally target bit is moved to the newly created
1759         // block, and removed from the old 'first' block. This is 'hard', so at this point
1760         // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
1761         // long-term), it's easier to disallow the loop than to update the flow graph to
1762         // support this case.
1763
1764         if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
1765         {
1766             JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
1767             return false;
1768         }
1769 #endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
1770
1771         // Compact the loop (sweep through it and move out any blocks that aren't part of the
1772         // flow cycle), and find the exits.
1773         if (!MakeCompactAndFindExits())
1774         {
1775             // Unable to preserve well-formed loop during compaction.
1776             return false;
1777         }
1778
1779         // We have a valid loop.
1780         return true;
1781     }
1782
1783 private:
1784     //------------------------------------------------------------------------
1785     // FindEntry: See if given HEAD flows to valid ENTRY between given TOP and BOTTOM
1786     //
1787     // Arguments:
1788     //    head - Block to be the HEAD of any loop identified
1789     //    top - Block to be the TOP of any loop identified
1790     //    bottom - Block to be the BOTTOM of any loop identified
1791     //
1792     // Return Value:
1793     //    Block to be the ENTRY of any loop identified, or nullptr if no
1794     //    such entry meeting our criteria can be found.
1795     //
1796     // Notes:
1797     //    Returns main entry if one is found, does not check for side-entries.
1798     //
1799     BasicBlock* FindEntry(BasicBlock* head, BasicBlock* top, BasicBlock* bottom)
1800     {
1801         if (head->bbJumpKind == BBJ_ALWAYS)
1802         {
1803             if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
1804             {
1805                 /* OK - we enter somewhere within the loop */
1806
1807                 /* some useful asserts
1808                 * Cannot enter at the top - should have being caught by redundant jumps */
1809
1810                 assert((head->bbJumpDest != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
1811
1812                 return head->bbJumpDest;
1813             }
1814             else
1815             {
1816                 /* special case - don't consider now */
1817                 // assert (!"Loop entered in weird way!");
1818                 return nullptr;
1819             }
1820         }
1821         // Can we fall through into the loop?
1822         else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
1823         {
1824             /* The ENTRY is at the TOP (a do-while loop) */
1825             return top;
1826         }
1827         else
1828         {
1829             return nullptr; // head does not flow into the loop bail for now
1830         }
1831     }
1832
1833     //------------------------------------------------------------------------
1834     // HasSingleEntryCycle: Perform a reverse flow walk from ENTRY, visiting
1835     //    only blocks between TOP and BOTTOM, to determine if such a cycle
1836     //    exists and if it has a single entry.
1837     //
1838     // Return Value:
1839     //    true  - Found a single-entry cycle.
1840     //    false - Did not find a single-entry cycle.
1841     //
1842     // Notes:
1843     //    Will mark (in `loopBlocks`) all blocks found to participate in the
1844     //    cycle.
1845     //
1846     bool HasSingleEntryCycle()
1847     {
1848         // Now do a backwards flow walk from entry to see if we have a single-entry loop
1849         bool foundCycle = false;
1850
1851         // Seed the loop block set and worklist with the entry block.
1852         loopBlocks.Reset(entry->bbNum);
1853         jitstd::list<BasicBlock*> worklist(comp->getAllocator());
1854         worklist.push_back(entry);
1855
1856         while (!worklist.empty())
1857         {
1858             BasicBlock* block = worklist.back();
1859             worklist.pop_back();
1860
1861             /* Make sure ENTRY dominates all blocks in the loop
1862             * This is necessary to ensure condition 2. above
1863             */
1864             if (block->bbNum > oldBlockMaxNum)
1865             {
1866                 // This is a new block we added to connect fall-through, so the
1867                 // recorded dominator information doesn't cover it.  Just continue,
1868                 // and when we process its unique predecessor we'll abort if ENTRY
1869                 // doesn't dominate that.
1870             }
1871             else if (!comp->fgDominate(entry, block))
1872             {
1873                 return false;
1874             }
1875
1876             // Add preds to the worklist, checking for side-entries.
1877             for (flowList* predIter = block->bbPreds; predIter != nullptr; predIter = predIter->flNext)
1878             {
1879                 BasicBlock* pred = predIter->flBlock;
1880
1881                 unsigned int testNum = PositionNum(pred);
1882
1883                 if ((testNum < top->bbNum) || (testNum > bottom->bbNum))
1884                 {
1885                     // Pred is out of loop range
1886                     if (block == entry)
1887                     {
1888                         if (pred == head)
1889                         {
1890                             // This is the single entry we expect.
1891                             continue;
1892                         }
1893                         // ENTRY has some pred other than head outside the loop.  If ENTRY does not
1894                         // dominate this pred, we'll consider this a side-entry and skip this loop;
1895                         // otherwise the loop is still valid and this may be a (flow-wise) back-edge
1896                         // of an outer loop.  For the dominance test, if `pred` is a new block, use
1897                         // its unique predecessor since the dominator tree has info for that.
1898                         BasicBlock* effectivePred = (pred->bbNum > oldBlockMaxNum ? pred->bbPrev : pred);
1899                         if (comp->fgDominate(entry, effectivePred))
1900                         {
1901                             // Outer loop back-edge
1902                             continue;
1903                         }
1904                     }
1905
1906                     // There are multiple entries to this loop, don't consider it.
1907                     return false;
1908                 }
1909
1910                 bool isFirstVisit;
1911                 if (pred == entry)
1912                 {
1913                     // We have indeed found a cycle in the flow graph.
1914                     isFirstVisit = !foundCycle;
1915                     foundCycle   = true;
1916                     assert(loopBlocks.IsMember(pred->bbNum));
1917                 }
1918                 else if (loopBlocks.TestAndInsert(pred->bbNum))
1919                 {
1920                     // Already visited this pred
1921                     isFirstVisit = false;
1922                 }
1923                 else
1924                 {
1925                     // Add this pred to the worklist
1926                     worklist.push_back(pred);
1927                     isFirstVisit = true;
1928                 }
1929
1930                 if (isFirstVisit && (pred->bbNext != nullptr) && (PositionNum(pred->bbNext) == pred->bbNum))
1931                 {
1932                     // We've created a new block immediately after `pred` to
1933                     // reconnect what was fall-through.  Mark it as in-loop also;
1934                     // it needs to stay with `prev` and if it exits the loop we'd
1935                     // just need to re-create it if we tried to move it out.
1936                     loopBlocks.Insert(pred->bbNext->bbNum);
1937                 }
1938             }
1939         }
1940
1941         return foundCycle;
1942     }
1943
1944     //------------------------------------------------------------------------
1945     // PositionNum: Get the number identifying a block's position per the
1946     //    lexical ordering that existed before searching for (and compacting)
1947     //    loops.
1948     //
1949     // Arguments:
1950     //    block - Block whose position is desired.
1951     //
1952     // Return Value:
1953     //    A number indicating that block's position relative to others.
1954     //
1955     // Notes:
1956     //    When the given block is a new one created during loop compaction,
1957     //    the number of its unique predecessor is returned.
1958     //
1959     unsigned int PositionNum(BasicBlock* block)
1960     {
1961         if (block->bbNum > oldBlockMaxNum)
1962         {
1963             // This must be a block we inserted to connect fall-through after moving blocks.
1964             // To determine if it's in the loop or not, use the number of its unique predecessor
1965             // block.
1966             assert(block->bbPreds->flBlock == block->bbPrev);
1967             assert(block->bbPreds->flNext == nullptr);
1968             return block->bbPrev->bbNum;
1969         }
1970         return block->bbNum;
1971     }
1972
1973     //------------------------------------------------------------------------
1974     // MakeCompactAndFindExits: Compact the loop (sweep through it and move out
1975     //   any blocks that aren't part of the flow cycle), and find the exits (set
1976     //   lastExit and exitCount).
1977     //
1978     // Return Value:
1979     //    true  - Loop successfully compacted (or `loopBlocks` expanded to
1980     //            include all blocks in the lexical range), exits enumerated.
1981     //    false - Loop cannot be made compact and remain well-formed.
1982     //
1983     bool MakeCompactAndFindExits()
1984     {
1985         // Compaction (if it needs to happen) will require an insertion point.
1986         BasicBlock* moveAfter = nullptr;
1987
1988         for (BasicBlock* previous = top->bbPrev; previous != bottom;)
1989         {
1990             BasicBlock* block = previous->bbNext;
1991
1992             if (loopBlocks.IsMember(block->bbNum))
1993             {
1994                 // This block is a member of the loop.  Check to see if it may exit the loop.
1995                 CheckForExit(block);
1996
1997                 // Done processing this block; move on to the next.
1998                 previous = block;
1999                 continue;
2000             }
2001
2002             // This blocks is lexically between TOP and BOTTOM, but it does not
2003             // participate in the flow cycle.  Check for a run of consecutive
2004             // such blocks.
2005             BasicBlock* lastNonLoopBlock = block;
2006             BasicBlock* nextLoopBlock    = block->bbNext;
2007             while (!loopBlocks.IsMember(nextLoopBlock->bbNum))
2008             {
2009                 lastNonLoopBlock = nextLoopBlock;
2010                 nextLoopBlock    = nextLoopBlock->bbNext;
2011                 // This loop must terminate because we know BOTTOM is in loopBlocks.
2012             }
2013
2014             // Choose an insertion point for non-loop blocks if we haven't yet done so.
2015             if (moveAfter == nullptr)
2016             {
2017                 moveAfter = FindInsertionPoint();
2018             }
2019
2020             if (!BasicBlock::sameEHRegion(previous, nextLoopBlock) || !BasicBlock::sameEHRegion(previous, moveAfter))
2021             {
2022                 // EH regions would be ill-formed if we moved these blocks out.
2023                 // See if we can consider them loop blocks without introducing
2024                 // a side-entry.
2025                 if (CanTreatAsLoopBlocks(block, lastNonLoopBlock))
2026                 {
2027                     // The call to `canTreatAsLoop` marked these blocks as part of the loop;
2028                     // iterate without updating `previous` so that we'll analyze them as part
2029                     // of the loop.
2030                     continue;
2031                 }
2032                 else
2033                 {
2034                     // We can't move these out of the loop or leave them in, so just give
2035                     // up on this loop.
2036                     return false;
2037                 }
2038             }
2039
2040             // Now physically move the blocks.
2041             BasicBlock* moveBefore = moveAfter->bbNext;
2042
2043             comp->fgUnlinkRange(block, lastNonLoopBlock);
2044             comp->fgMoveBlocksAfter(block, lastNonLoopBlock, moveAfter);
2045             comp->ehUpdateLastBlocks(moveAfter, lastNonLoopBlock);
2046
2047             // Apply any adjustments needed for fallthrough at the boundaries of the moved region.
2048             FixupFallThrough(moveAfter, moveBefore, block);
2049             FixupFallThrough(lastNonLoopBlock, nextLoopBlock, moveBefore);
2050             // Also apply any adjustments needed where the blocks were snipped out of the loop.
2051             BasicBlock* newBlock = FixupFallThrough(previous, block, nextLoopBlock);
2052             if (newBlock != nullptr)
2053             {
2054                 // This new block is in the loop and is a loop exit.
2055                 loopBlocks.Insert(newBlock->bbNum);
2056                 lastExit = newBlock;
2057                 ++exitCount;
2058             }
2059
2060             // Update moveAfter for the next insertion.
2061             moveAfter = lastNonLoopBlock;
2062
2063             // Note that we've changed the flow graph, and continue without updating
2064             // `previous` so that we'll process nextLoopBlock.
2065             changedFlowGraph = true;
2066         }
2067
2068         if ((exitCount == 1) && (lastExit == nullptr))
2069         {
2070             // If we happen to have a loop with two exits, one of which goes to an
2071             // infinite loop that's lexically nested inside it, where the inner loop
2072             // can't be moved out,  we can end up in this situation (because
2073             // CanTreatAsLoopBlocks will have decremented the count expecting to find
2074             // another exit later).  Bump the exit count to 2, since downstream code
2075             // will not be prepared for null lastExit with exitCount of 1.
2076             assert(forgotExit);
2077             exitCount = 2;
2078         }
2079
2080         // Loop compaction was successful
2081         return true;
2082     }
2083
2084     //------------------------------------------------------------------------
2085     // FindInsertionPoint: Find an appropriate spot to which blocks that are
2086     //    lexically between TOP and BOTTOM but not part of the flow cycle
2087     //    can be moved.
2088     //
2089     // Return Value:
2090     //    Block after which to insert moved blocks.
2091     //
2092     BasicBlock* FindInsertionPoint()
2093     {
2094         // Find an insertion point for blocks we're going to move.  Move them down
2095         // out of the loop, and if possible find a spot that won't break up fall-through.
2096         BasicBlock* moveAfter = bottom;
2097         while (moveAfter->bbFallsThrough())
2098         {
2099             // Keep looking for a better insertion point if we can.
2100             BasicBlock* newMoveAfter = TryAdvanceInsertionPoint(moveAfter);
2101
2102             if (newMoveAfter == nullptr)
2103             {
2104                 // Ran out of candidate insertion points, so just split up the fall-through.
2105                 return moveAfter;
2106             }
2107
2108             moveAfter = newMoveAfter;
2109         }
2110
2111         return moveAfter;
2112     }
2113
2114     //------------------------------------------------------------------------
2115     // TryAdvanceInsertionPoint: Find the next legal insertion point after
2116     //    the given one, if one exists.
2117     //
2118     // Arguments:
2119     //    oldMoveAfter - Prior insertion point; find the next after this.
2120     //
2121     // Return Value:
2122     //    The next block after `oldMoveAfter` that is a legal insertion point
2123     //    (i.e. blocks being swept out of the loop can be moved immediately
2124     //    after it), if one exists, else nullptr.
2125     //
2126     BasicBlock* TryAdvanceInsertionPoint(BasicBlock* oldMoveAfter)
2127     {
2128         BasicBlock* newMoveAfter = oldMoveAfter->bbNext;
2129
2130         if (!BasicBlock::sameEHRegion(oldMoveAfter, newMoveAfter))
2131         {
2132             // Don't cross an EH region boundary.
2133             return nullptr;
2134         }
2135
2136         if ((newMoveAfter->bbJumpKind == BBJ_ALWAYS) || (newMoveAfter->bbJumpKind == BBJ_COND))
2137         {
2138             unsigned int destNum = newMoveAfter->bbJumpDest->bbNum;
2139             if ((destNum >= top->bbNum) && (destNum <= bottom->bbNum) && !loopBlocks.IsMember(destNum))
2140             {
2141                 // Reversing this branch out of block `newMoveAfter` could confuse this algorithm
2142                 // (in particular, the edge would still be numerically backwards but no longer be
2143                 // lexically backwards, so a lexical forward walk from TOP would not find BOTTOM),
2144                 // so don't do that.
2145                 // We're checking for BBJ_ALWAYS and BBJ_COND only here -- we don't need to
2146                 // check for BBJ_SWITCH because we'd never consider it a loop back-edge.
2147                 return nullptr;
2148             }
2149         }
2150
2151         // Similarly check to see if advancing to `newMoveAfter` would reverse the lexical order
2152         // of an edge from the run of blocks being moved to `newMoveAfter` -- doing so would
2153         // introduce a new lexical back-edge, which could (maybe?) confuse the loop search
2154         // algorithm, and isn't desirable layout anyway.
2155         for (flowList* predIter = newMoveAfter->bbPreds; predIter != nullptr; predIter = predIter->flNext)
2156         {
2157             unsigned int predNum = predIter->flBlock->bbNum;
2158
2159             if ((predNum >= top->bbNum) && (predNum <= bottom->bbNum) && !loopBlocks.IsMember(predNum))
2160             {
2161                 // Don't make this forward edge a backwards edge.
2162                 return nullptr;
2163             }
2164         }
2165
2166         if (IsRecordedBottom(newMoveAfter))
2167         {
2168             // This is the BOTTOM of another loop; don't move any blocks past it, to avoid moving them
2169             // out of that loop (we should have already done so when processing that loop if it were legal).
2170             return nullptr;
2171         }
2172
2173         // Advancing the insertion point is ok, except that we can't split up any CallFinally/BBJ_ALWAYS
2174         // pair, so if we've got such a pair recurse to see if we can move past the whole thing.
2175         return (newMoveAfter->isBBCallAlwaysPair() ? TryAdvanceInsertionPoint(newMoveAfter) : newMoveAfter);
2176     }
2177
2178     //------------------------------------------------------------------------
2179     // isOuterBottom: Determine if the given block is the BOTTOM of a previously
2180     //    recorded loop.
2181     //
2182     // Arguments:
2183     //    block - Block to check for BOTTOM-ness.
2184     //
2185     // Return Value:
2186     //    true - The blocks was recorded as `bottom` of some earlier-processed loop.
2187     //    false - No loops yet recorded have this block as their `bottom`.
2188     //
2189     bool IsRecordedBottom(BasicBlock* block)
2190     {
2191         if (block->bbNum > oldBlockMaxNum)
2192         {
2193             // This is a new block, which can't be an outer bottom block because we only allow old blocks
2194             // as BOTTOM.
2195             return false;
2196         }
2197         return BlockSetOps::IsMember(comp, bottomBlocks, block->bbNum);
2198     }
2199
2200     //------------------------------------------------------------------------
2201     // CanTreatAsLoopBlocks: If the given range of blocks can be treated as
2202     //    loop blocks, add them to loopBlockSet and return true.  Otherwise,
2203     //    return false.
2204     //
2205     // Arguments:
2206     //    firstNonLoopBlock - First block in the run to be subsumed.
2207     //    lastNonLoopBlock - Last block in the run to be subsumed.
2208     //
2209     // Return Value:
2210     //    true - The blocks from `fistNonLoopBlock` to `lastNonLoopBlock` were
2211     //           successfully added to `loopBlocks`.
2212     //    false - Treating the blocks from `fistNonLoopBlock` to `lastNonLoopBlock`
2213     //            would not be legal (it would induce a side-entry).
2214     //
2215     // Notes:
2216     //    `loopBlocks` may be modified even if `false` is returned.
2217     //    `exitCount` and `lastExit` may be modified if this process identifies
2218     //    in-loop edges that were previously counted as exits.
2219     //
2220     bool CanTreatAsLoopBlocks(BasicBlock* firstNonLoopBlock, BasicBlock* lastNonLoopBlock)
2221     {
2222         BasicBlock* nextLoopBlock = lastNonLoopBlock->bbNext;
2223         for (BasicBlock* testBlock = firstNonLoopBlock; testBlock != nextLoopBlock; testBlock = testBlock->bbNext)
2224         {
2225             for (flowList* predIter = testBlock->bbPreds; predIter != nullptr; predIter = predIter->flNext)
2226             {
2227                 BasicBlock*  testPred           = predIter->flBlock;
2228                 unsigned int predPosNum         = PositionNum(testPred);
2229                 unsigned int firstNonLoopPosNum = PositionNum(firstNonLoopBlock);
2230                 unsigned int lastNonLoopPosNum  = PositionNum(lastNonLoopBlock);
2231
2232                 if (loopBlocks.IsMember(predPosNum) ||
2233                     ((predPosNum >= firstNonLoopPosNum) && (predPosNum <= lastNonLoopPosNum)))
2234                 {
2235                     // This pred is in the loop (or what will be the loop if we determine this
2236                     // run of exit blocks doesn't include a side-entry).
2237
2238                     if (predPosNum < firstNonLoopPosNum)
2239                     {
2240                         // We've already counted this block as an exit, so decrement the count.
2241                         --exitCount;
2242                         if (lastExit == testPred)
2243                         {
2244                             // Erase this now-bogus `lastExit` entry.
2245                             lastExit = nullptr;
2246                             INDEBUG(forgotExit = true);
2247                         }
2248                     }
2249                 }
2250                 else
2251                 {
2252                     // This pred is not in the loop, so this constitutes a side-entry.
2253                     return false;
2254                 }
2255             }
2256
2257             // Either we're going to abort the loop on a subsequent testBlock, or this
2258             // testBlock is part of the loop.
2259             loopBlocks.Insert(testBlock->bbNum);
2260         }
2261
2262         // All blocks were ok to leave in the loop.
2263         return true;
2264     }
2265
2266     //------------------------------------------------------------------------
2267     // FixupFallThrough: Re-establish any broken control flow connectivity
2268     //    and eliminate any "goto-next"s that were created by changing the
2269     //    given block's lexical follower.
2270     //
2271     // Arguments:
2272     //    block - Block whose `bbNext` has changed.
2273     //    oldNext - Previous value of `block->bbNext`.
2274     //    newNext - New value of `block->bbNext`.
2275     //
2276     // Return Value:
2277     //    If a new block is created to reconnect flow, the new block is
2278     //    returned; otherwise, nullptr.
2279     //
2280     BasicBlock* FixupFallThrough(BasicBlock* block, BasicBlock* oldNext, BasicBlock* newNext)
2281     {
2282         // If we create a new block, that will be our return value.
2283         BasicBlock* newBlock = nullptr;
2284
2285         if (block->bbFallsThrough())
2286         {
2287             // Need to reconnect the flow from `block` to `oldNext`.
2288
2289             if ((block->bbJumpKind == BBJ_COND) && (block->bbJumpDest == newNext))
2290             {
2291                 /* Reverse the jump condition */
2292                 GenTree* test = block->lastNode();
2293                 noway_assert(test->OperIsConditionalJump());
2294
2295                 if (test->OperGet() == GT_JTRUE)
2296                 {
2297                     GenTree* cond = comp->gtReverseCond(test->gtOp.gtOp1);
2298                     assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
2299                     test->gtOp.gtOp1 = cond;
2300                 }
2301                 else
2302                 {
2303                     comp->gtReverseCond(test);
2304                 }
2305
2306                 // Redirect the Conditional JUMP to go to `oldNext`
2307                 block->bbJumpDest = oldNext;
2308             }
2309             else
2310             {
2311                 // Insert an unconditional jump to `oldNext` just after `block`.
2312                 newBlock = comp->fgConnectFallThrough(block, oldNext);
2313                 noway_assert((newBlock == nullptr) || loopBlocks.CanRepresent(newBlock->bbNum));
2314             }
2315         }
2316         else if ((block->bbJumpKind == BBJ_ALWAYS) && (block->bbJumpDest == newNext))
2317         {
2318             // We've made `block`'s jump target its bbNext, so remove the jump.
2319             if (!comp->fgOptimizeBranchToNext(block, newNext, block->bbPrev))
2320             {
2321                 // If optimizing away the goto-next failed for some reason, mark it KEEP_BBJ_ALWAYS to
2322                 // prevent assertions from complaining about it.
2323                 block->bbFlags |= BBF_KEEP_BBJ_ALWAYS;
2324             }
2325         }
2326
2327         // Make sure we don't leave around a goto-next unless it's marked KEEP_BBJ_ALWAYS.
2328         assert((block->bbJumpKind != BBJ_COND) || (block->bbJumpKind != BBJ_ALWAYS) || (block->bbJumpDest != newNext) ||
2329                ((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0));
2330         return newBlock;
2331     }
2332
2333     //------------------------------------------------------------------------
2334     // CheckForExit: Check if the given block has any successor edges that are
2335     //    loop exits, and update `lastExit` and `exitCount` if so.
2336     //
2337     // Arguments:
2338     //    block - Block whose successor edges are to be checked.
2339     //
2340     // Notes:
2341     //    If one block has multiple exiting successor edges, those are counted
2342     //    as multiple exits in `exitCount`.
2343     //
2344     void CheckForExit(BasicBlock* block)
2345     {
2346         BasicBlock* exitPoint;
2347
2348         switch (block->bbJumpKind)
2349         {
2350             case BBJ_COND:
2351             case BBJ_CALLFINALLY:
2352             case BBJ_ALWAYS:
2353             case BBJ_EHCATCHRET:
2354                 assert(block->bbJumpDest);
2355                 exitPoint = block->bbJumpDest;
2356
2357                 if (!loopBlocks.IsMember(exitPoint->bbNum))
2358                 {
2359                     /* exit from a block other than BOTTOM */
2360                     lastExit = block;
2361                     exitCount++;
2362                 }
2363                 break;
2364
2365             case BBJ_NONE:
2366                 break;
2367
2368             case BBJ_EHFINALLYRET:
2369             case BBJ_EHFILTERRET:
2370                 /* The "try" associated with this "finally" must be in the
2371                 * same loop, so the finally block will return control inside the loop */
2372                 break;
2373
2374             case BBJ_THROW:
2375             case BBJ_RETURN:
2376                 /* those are exits from the loop */
2377                 lastExit = block;
2378                 exitCount++;
2379                 break;
2380
2381             case BBJ_SWITCH:
2382
2383                 unsigned jumpCnt;
2384                 jumpCnt = block->bbJumpSwt->bbsCount;
2385                 BasicBlock** jumpTab;
2386                 jumpTab = block->bbJumpSwt->bbsDstTab;
2387
2388                 do
2389                 {
2390                     noway_assert(*jumpTab);
2391                     exitPoint = *jumpTab;
2392
2393                     if (!loopBlocks.IsMember(exitPoint->bbNum))
2394                     {
2395                         lastExit = block;
2396                         exitCount++;
2397                     }
2398                 } while (++jumpTab, --jumpCnt);
2399                 break;
2400
2401             default:
2402                 noway_assert(!"Unexpected bbJumpKind");
2403                 break;
2404         }
2405
2406         if (block->bbFallsThrough() && !loopBlocks.IsMember(block->bbNext->bbNum))
2407         {
2408             // Found a fall-through exit.
2409             lastExit = block;
2410             exitCount++;
2411         }
2412     }
2413 };
2414 }
2415
2416 /*****************************************************************************
2417  * Find the natural loops, using dominators. Note that the test for
2418  * a loop is slightly different from the standard one, because we have
2419  * not done a depth first reordering of the basic blocks.
2420  */
2421
2422 void Compiler::optFindNaturalLoops()
2423 {
2424 #ifdef DEBUG
2425     if (verbose)
2426     {
2427         printf("*************** In optFindNaturalLoops()\n");
2428     }
2429 #endif // DEBUG
2430
2431     noway_assert(fgDomsComputed);
2432     assert(fgHasLoops);
2433
2434 #if COUNT_LOOPS
2435     hasMethodLoops         = false;
2436     loopsThisMethod        = 0;
2437     loopOverflowThisMethod = false;
2438 #endif
2439
2440     LoopSearch search(this);
2441
2442     for (BasicBlock* head = fgFirstBB; head->bbNext; head = head->bbNext)
2443     {
2444         BasicBlock* top = head->bbNext;
2445
2446         //  Blocks that are rarely run have a zero bbWeight and should
2447         //  never be optimized here
2448
2449         if (top->bbWeight == BB_ZERO_WEIGHT)
2450         {
2451             continue;
2452         }
2453
2454         for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
2455         {
2456             if (search.FindLoop(head, top, pred->flBlock))
2457             {
2458                 // Found a loop; record it and see if we've hit the limit.
2459                 bool recordedLoop = search.RecordLoop();
2460
2461                 (void)recordedLoop; // avoid unusued variable warnings in COUNT_LOOPS and !DEBUG
2462
2463 #if COUNT_LOOPS
2464                 if (!hasMethodLoops)
2465                 {
2466                     /* mark the method as containing natural loops */
2467                     totalLoopMethods++;
2468                     hasMethodLoops = true;
2469                 }
2470
2471                 /* increment total number of loops found */
2472                 totalLoopCount++;
2473                 loopsThisMethod++;
2474
2475                 /* keep track of the number of exits */
2476                 loopExitCountTable.record(static_cast<unsigned>(exitCount));
2477 #else  // COUNT_LOOPS
2478                 assert(recordedLoop);
2479                 if (optLoopCount == MAX_LOOP_NUM)
2480                 {
2481                     // We won't be able to record any more loops, so stop looking.
2482                     goto NO_MORE_LOOPS;
2483                 }
2484 #endif // COUNT_LOOPS
2485
2486                 // Continue searching preds of `top` to see if any other are
2487                 // back-edges (this can happen for nested loops).  The iteration
2488                 // is safe because the compaction we do only modifies predecessor
2489                 // lists of blocks that gain or lose fall-through from their
2490                 // `bbPrev`, but since the motion is from within the loop to below
2491                 // it, we know we're not altering the relationship between `top`
2492                 // and its `bbPrev`.
2493             }
2494         }
2495     }
2496 NO_MORE_LOOPS:
2497
2498 #if COUNT_LOOPS
2499     loopCountTable.record(loopsThisMethod);
2500     if (maxLoopsPerMethod < loopsThisMethod)
2501     {
2502         maxLoopsPerMethod = loopsThisMethod;
2503     }
2504     if (loopOverflowThisMethod)
2505     {
2506         totalLoopOverflows++;
2507     }
2508 #endif // COUNT_LOOPS
2509
2510     bool mod = search.ChangedFlowGraph();
2511
2512     if (mod)
2513     {
2514         // Need to renumber blocks now since loop canonicalization
2515         // depends on it; can defer the rest of fgUpdateChangedFlowGraph()
2516         // until after canonicalizing loops.  Dominator information is
2517         // recorded in terms of block numbers, so flag it invalid.
2518         fgDomsComputed = false;
2519         fgRenumberBlocks();
2520     }
2521
2522     // Now the loop indices are stable.  We can figure out parent/child relationships
2523     // (using table indices to name loops), and label blocks.
2524     for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
2525     {
2526         for (unsigned char possibleParent = loopInd; possibleParent > 0;)
2527         {
2528             possibleParent--;
2529             if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
2530             {
2531                 optLoopTable[loopInd].lpParent       = possibleParent;
2532                 optLoopTable[loopInd].lpSibling      = optLoopTable[possibleParent].lpChild;
2533                 optLoopTable[possibleParent].lpChild = loopInd;
2534                 break;
2535             }
2536         }
2537     }
2538
2539     // Now label the blocks with the innermost loop to which they belong.  Since parents
2540     // precede children in the table, doing the labeling for each loop in order will achieve
2541     // this -- the innermost loop labeling will be done last.
2542     for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
2543     {
2544         BasicBlock* first  = optLoopTable[loopInd].lpFirst;
2545         BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
2546         for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
2547         {
2548             blk->bbNatLoopNum = loopInd;
2549             if (blk == bottom)
2550             {
2551                 break;
2552             }
2553             assert(blk->bbNext != nullptr); // We should never reach nullptr.
2554         }
2555     }
2556
2557     // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
2558     // one, if necessary, for loops containing others that share a "top."
2559     for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
2560     {
2561         // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
2562         if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
2563         {
2564             continue;
2565         }
2566
2567         // Otherwise...
2568         if (optCanonicalizeLoopNest(loopInd))
2569         {
2570             mod = true;
2571         }
2572     }
2573     if (mod)
2574     {
2575         fgUpdateChangedFlowGraph();
2576     }
2577
2578 #ifdef DEBUG
2579     if (verbose && optLoopCount > 0)
2580     {
2581         printf("\nFinal natural loop table:\n");
2582         for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
2583         {
2584             optPrintLoopInfo(loopInd);
2585             printf("\n");
2586         }
2587     }
2588 #endif // DEBUG
2589 }
2590
2591 void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
2592 {
2593     BasicBlock* newJumpDest = nullptr;
2594     switch (blk->bbJumpKind)
2595     {
2596         case BBJ_THROW:
2597         case BBJ_RETURN:
2598         case BBJ_NONE:
2599         case BBJ_EHFILTERRET:
2600         case BBJ_EHFINALLYRET:
2601         case BBJ_EHCATCHRET:
2602             // These have no jump destination to update.
2603             break;
2604
2605         case BBJ_ALWAYS:
2606         case BBJ_LEAVE:
2607         case BBJ_CALLFINALLY:
2608         case BBJ_COND:
2609             // All of these have a single jump destination to update.
2610             if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
2611             {
2612                 blk->bbJumpDest = newJumpDest;
2613             }
2614             break;
2615
2616         case BBJ_SWITCH:
2617         {
2618             bool redirected = false;
2619             for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
2620             {
2621                 if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
2622                 {
2623                     blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
2624                     redirected                   = true;
2625                 }
2626             }
2627             // If any redirections happend, invalidate the switch table map for the switch.
2628             if (redirected)
2629             {
2630                 // Don't create a new map just to try to remove an entry.
2631                 BlockToSwitchDescMap* switchMap = GetSwitchDescMap(/* createIfNull */ false);
2632                 if (switchMap != nullptr)
2633                 {
2634                     switchMap->Remove(blk);
2635                 }
2636             }
2637         }
2638         break;
2639
2640         default:
2641             unreached();
2642     }
2643 }
2644
2645 // TODO-Cleanup: This should be a static member of the BasicBlock class.
2646 void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
2647 {
2648     assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
2649
2650     // copy the jump destination(s) from "from" to "to".
2651     switch (to->bbJumpKind)
2652     {
2653         case BBJ_ALWAYS:
2654         case BBJ_LEAVE:
2655         case BBJ_CALLFINALLY:
2656         case BBJ_COND:
2657             // All of these have a single jump destination to update.
2658             to->bbJumpDest = from->bbJumpDest;
2659             break;
2660
2661         case BBJ_SWITCH:
2662         {
2663             to->bbJumpSwt            = new (this, CMK_BasicBlock) BBswtDesc();
2664             to->bbJumpSwt->bbsCount  = from->bbJumpSwt->bbsCount;
2665             to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
2666
2667             for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
2668             {
2669                 to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
2670             }
2671         }
2672         break;
2673
2674         default:
2675             break;
2676     }
2677 }
2678
2679 // Canonicalize the loop nest rooted at parent loop 'loopInd'.
2680 // Returns 'true' if the flow graph is modified.
2681 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
2682 {
2683     bool modified = false;
2684
2685     // Is the top of the current loop not in any nested loop?
2686     if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
2687     {
2688         if (optCanonicalizeLoop(loopInd))
2689         {
2690             modified = true;
2691         }
2692     }
2693
2694     for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
2695          child               = optLoopTable[child].lpSibling)
2696     {
2697         if (optCanonicalizeLoopNest(child))
2698         {
2699             modified = true;
2700         }
2701     }
2702
2703     return modified;
2704 }
2705
2706 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
2707 {
2708     // Is the top uniquely part of the current loop?
2709     BasicBlock* t = optLoopTable[loopInd].lpTop;
2710
2711     if (t->bbNatLoopNum == loopInd)
2712     {
2713         return false;
2714     }
2715
2716     JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
2717             "canonicalize\n",
2718             loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
2719
2720     // Otherwise, the top of this loop is also part of a nested loop.
2721     //
2722     // Insert a new unique top for this loop. We must be careful to put this new
2723     // block in the correct EH region. Note that f->bbPrev might be in a different
2724     // EH region. For example:
2725     //
2726     // try {
2727     //      ...
2728     //      BB07
2729     // }
2730     // BB08 // "first"
2731     //
2732     // In this case, first->bbPrev is BB07, which is in a different 'try' region.
2733     // On the other hand, the first block of multiple loops might be the first
2734     // block of a 'try' region that is completely contained in the multiple loops.
2735     // for example:
2736     //
2737     // BB08 try { }
2738     // ...
2739     // BB10 BBJ_ALWAYS => BB08
2740     // ...
2741     // BB12 BBJ_ALWAYS => BB08
2742     //
2743     // Here, we have two loops, both with BB08 as the "first" block. Block BB08
2744     // is a single-block "try" region. Neither loop "bottom" block is in the same
2745     // "try" region as BB08. This is legal because you can jump to the first block
2746     // of a try region. With EH normalization, no two "try" regions will share
2747     // this block. In this case, we need to insert a new block for the outer loop
2748     // in the same EH region as the branch from the "bottom":
2749     //
2750     // BB30 BBJ_NONE
2751     // BB08 try { }
2752     // ...
2753     // BB10 BBJ_ALWAYS => BB08
2754     // ...
2755     // BB12 BBJ_ALWAYS => BB30
2756     //
2757     // Another possibility is that the "first" block of the loop nest can be the first block
2758     // of a "try" region that also has other predecessors than those in the loop, or even in
2759     // the "try" region (since blocks can target the first block of a "try" region). For example:
2760     //
2761     // BB08 try {
2762     // ...
2763     // BB10 BBJ_ALWAYS => BB08
2764     // ...
2765     // BB12 BBJ_ALWAYS => BB08
2766     // BB13 }
2767     // ...
2768     // BB20 BBJ_ALWAYS => BB08
2769     // ...
2770     // BB25 BBJ_ALWAYS => BB08
2771     //
2772     // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
2773     // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
2774     // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
2775     // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
2776     // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
2777     // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
2778     // situation of branching to a non-first block of a 'try' region.
2779     //
2780     // We can also have a loop nest where the "first" block is outside of a "try" region
2781     // and the back edges are inside a "try" region, for example:
2782     //
2783     // BB02 // "first"
2784     // ...
2785     // BB09 try { BBJ_COND => BB02
2786     // ...
2787     // BB15 BBJ_COND => BB02
2788     // ...
2789     // BB21 } // end of "try"
2790     //
2791     // In this case, both loop back edges were formed by "leave" instructions that were
2792     // imported into branches that were later made conditional. In this case, we don't
2793     // want to copy the EH region of the back edge, since that would create a block
2794     // outside of and disjoint with the "try" region of the back edge. However, to
2795     // simplify things, we disqualify this type of loop, so we should never see this here.
2796
2797     BasicBlock* h = optLoopTable[loopInd].lpHead;
2798     BasicBlock* f = optLoopTable[loopInd].lpFirst;
2799     BasicBlock* b = optLoopTable[loopInd].lpBottom;
2800
2801     // The loop must be entirely contained within a single handler region.
2802     assert(BasicBlock::sameHndRegion(f, b));
2803
2804     // If the bottom block is in the same "try" region, then we extend the EH
2805     // region. Otherwise, we add the new block outside the "try" region.
2806     bool        extendRegion = BasicBlock::sameTryRegion(f, b);
2807     BasicBlock* newT         = fgNewBBbefore(BBJ_NONE, f, extendRegion);
2808     if (!extendRegion)
2809     {
2810         // We need to set the EH region manually. Set it to be the same
2811         // as the bottom block.
2812         newT->copyEHRegion(b);
2813     }
2814
2815     // The new block can reach the same set of blocks as the old one, but don't try to reflect
2816     // that in its reachability set here -- creating the new block may have changed the BlockSet
2817     // representation from short to long, and canonicalizing loops is immediately followed by
2818     // a call to fgUpdateChangedFlowGraph which will recompute the reachability sets anyway.
2819
2820     // Redirect the "bottom" of the current loop to "newT".
2821     BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
2822     blockMap->Set(t, newT);
2823     optRedirectBlock(b, blockMap);
2824
2825     // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
2826     // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
2827     // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
2828     // they might have been prevented from participating in the loop nest due to different EH nesting, or some
2829     // other reason.
2830     //
2831     // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
2832     // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
2833     // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
2834     // edge of the blockMap, so nothing will happen.
2835     bool firstPred = true;
2836     for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
2837     {
2838         BasicBlock* topPredBlock = topPred->flBlock;
2839
2840         // Skip if topPredBlock is in the loop.
2841         // Note that this uses block number to detect membership in the loop. We are adding blocks during
2842         // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
2843         // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
2844         if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
2845         {
2846             JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
2847                     "redirecting its bottom edge\n",
2848                     topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
2849             continue;
2850         }
2851
2852         JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
2853                 newT->bbNum);
2854         optRedirectBlock(topPredBlock, blockMap);
2855
2856         // When we have profile data then the 'newT' block will inherit topPredBlock profile weight
2857         if (topPredBlock->hasProfileWeight())
2858         {
2859             // This corrects an issue when the topPredBlock has a profile based weight
2860             //
2861             if (firstPred)
2862             {
2863                 JITDUMP("in optCanonicalizeLoop: block BB%02u will inheritWeight from BB%02u\n", newT->bbNum,
2864                         topPredBlock->bbNum);
2865
2866                 newT->inheritWeight(topPredBlock);
2867                 firstPred = false;
2868             }
2869             else
2870             {
2871                 JITDUMP("in optCanonicalizeLoop: block BB%02u will also contribute to the weight of BB%02u\n",
2872                         newT->bbNum, topPredBlock->bbNum);
2873
2874                 BasicBlock::weight_t newWeight = newT->getBBWeight(this) + topPredBlock->getBBWeight(this);
2875                 newT->setBBWeight(newWeight);
2876             }
2877         }
2878     }
2879
2880     assert(newT->bbNext == f);
2881     if (f != t)
2882     {
2883         newT->bbJumpKind = BBJ_ALWAYS;
2884         newT->bbJumpDest = t;
2885         newT->bbTreeList = nullptr;
2886         fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2887     }
2888
2889     // If it had been a do-while loop (top == entry), update entry, as well.
2890     BasicBlock* origE = optLoopTable[loopInd].lpEntry;
2891     if (optLoopTable[loopInd].lpTop == origE)
2892     {
2893         optLoopTable[loopInd].lpEntry = newT;
2894     }
2895     optLoopTable[loopInd].lpTop   = newT;
2896     optLoopTable[loopInd].lpFirst = newT;
2897
2898     newT->bbNatLoopNum = loopInd;
2899
2900     JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
2901             dspPtr(newT), loopInd);
2902
2903     // Make sure the head block still goes to the entry...
2904     if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
2905     {
2906         h->bbJumpKind = BBJ_ALWAYS;
2907         h->bbJumpDest = optLoopTable[loopInd].lpEntry;
2908     }
2909     else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
2910     {
2911         BasicBlock* h2               = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
2912         optLoopTable[loopInd].lpHead = h2;
2913         h2->bbJumpDest               = optLoopTable[loopInd].lpEntry;
2914         h2->bbTreeList               = nullptr;
2915         fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
2916     }
2917
2918     // If any loops nested in "loopInd" have the same head and entry as "loopInd",
2919     // it must be the case that they were do-while's (since "h" fell through to the entry).
2920     // The new node "newT" becomes the head of such loops.
2921     for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2922          childLoop               = optLoopTable[childLoop].lpSibling)
2923     {
2924         if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
2925             newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
2926         {
2927             optUpdateLoopHead(childLoop, h, newT);
2928         }
2929     }
2930     return true;
2931 }
2932
2933 bool Compiler::optLoopContains(unsigned l1, unsigned l2)
2934 {
2935     assert(l1 != BasicBlock::NOT_IN_LOOP);
2936     if (l1 == l2)
2937     {
2938         return true;
2939     }
2940     else if (l2 == BasicBlock::NOT_IN_LOOP)
2941     {
2942         return false;
2943     }
2944     else
2945     {
2946         return optLoopContains(l1, optLoopTable[l2].lpParent);
2947     }
2948 }
2949
2950 void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
2951 {
2952     assert(optLoopTable[loopInd].lpHead == from);
2953     optLoopTable[loopInd].lpHead = to;
2954     for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
2955          childLoop               = optLoopTable[childLoop].lpSibling)
2956     {
2957         if (optLoopTable[childLoop].lpHead == from)
2958         {
2959             optUpdateLoopHead(childLoop, from, to);
2960         }
2961     }
2962 }
2963
2964 /*****************************************************************************
2965  * If the : i += const" will cause an overflow exception for the small types.
2966  */
2967
2968 bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
2969 {
2970     int type_MAX;
2971
2972     switch (incrType)
2973     {
2974         case TYP_BYTE:
2975             type_MAX = SCHAR_MAX;
2976             break;
2977         case TYP_UBYTE:
2978             type_MAX = UCHAR_MAX;
2979             break;
2980         case TYP_SHORT:
2981             type_MAX = SHRT_MAX;
2982             break;
2983         case TYP_USHORT:
2984             type_MAX = USHRT_MAX;
2985             break;
2986
2987         case TYP_UINT: // Detected by checking for 32bit ....
2988         case TYP_INT:
2989             return false; // ... overflow same as done for TYP_INT
2990
2991         default:
2992             NO_WAY("Bad type");
2993     }
2994
2995     if (iterAtExit > type_MAX)
2996     {
2997         return true;
2998     }
2999     else
3000     {
3001         return false;
3002     }
3003 }
3004
3005 /*****************************************************************************
3006  * If the "i -= const" will cause an underflow exception for the small types
3007  */
3008
3009 bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
3010 {
3011     int type_MIN;
3012
3013     switch (decrType)
3014     {
3015         case TYP_BYTE:
3016             type_MIN = SCHAR_MIN;
3017             break;
3018         case TYP_SHORT:
3019             type_MIN = SHRT_MIN;
3020             break;
3021         case TYP_UBYTE:
3022             type_MIN = 0;
3023             break;
3024         case TYP_USHORT:
3025             type_MIN = 0;
3026             break;
3027
3028         case TYP_UINT: // Detected by checking for 32bit ....
3029         case TYP_INT:
3030             return false; // ... underflow same as done for TYP_INT
3031
3032         default:
3033             NO_WAY("Bad type");
3034     }
3035
3036     if (iterAtExit < type_MIN)
3037     {
3038         return true;
3039     }
3040     else
3041     {
3042         return false;
3043     }
3044 }
3045
3046 /*****************************************************************************
3047  *
3048  *  Helper for unroll loops - Computes the number of repetitions
3049  *  in a constant loop. If it cannot prove the number is constant returns false
3050  */
3051
3052 bool Compiler::optComputeLoopRep(int        constInit,
3053                                  int        constLimit,
3054                                  int        iterInc,
3055                                  genTreeOps iterOper,
3056                                  var_types  iterOperType,
3057                                  genTreeOps testOper,
3058                                  bool       unsTest,
3059                                  bool       dupCond,
3060                                  unsigned*  iterCount)
3061 {
3062     noway_assert(genActualType(iterOperType) == TYP_INT);
3063
3064     __int64 constInitX;
3065     __int64 constLimitX;
3066
3067     unsigned loopCount;
3068     int      iterSign;
3069
3070     // Using this, we can just do a signed comparison with other 32 bit values.
3071     if (unsTest)
3072     {
3073         constLimitX = (unsigned int)constLimit;
3074     }
3075     else
3076     {
3077         constLimitX = (signed int)constLimit;
3078     }
3079
3080     switch (iterOperType)
3081     {
3082 // For small types, the iteration operator will narrow these values if big
3083
3084 #define INIT_ITER_BY_TYPE(type)                                                                                        \
3085     constInitX = (type)constInit;                                                                                      \
3086     iterInc    = (type)iterInc;
3087
3088         case TYP_BYTE:
3089             INIT_ITER_BY_TYPE(signed char);
3090             break;
3091         case TYP_UBYTE:
3092             INIT_ITER_BY_TYPE(unsigned char);
3093             break;
3094         case TYP_SHORT:
3095             INIT_ITER_BY_TYPE(signed short);
3096             break;
3097         case TYP_USHORT:
3098             INIT_ITER_BY_TYPE(unsigned short);
3099             break;
3100
3101         // For the big types, 32 bit arithmetic is performed
3102
3103         case TYP_INT:
3104         case TYP_UINT:
3105             if (unsTest)
3106             {
3107                 constInitX = (unsigned int)constInit;
3108             }
3109             else
3110             {
3111                 constInitX = (signed int)constInit;
3112             }
3113             break;
3114
3115         default:
3116             noway_assert(!"Bad type");
3117             NO_WAY("Bad type");
3118     }
3119
3120     /* If iterInc is zero we have an infinite loop */
3121     if (iterInc == 0)
3122     {
3123         return false;
3124     }
3125
3126     /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
3127     iterSign = (iterInc > 0) ? +1 : -1;
3128
3129     /* Initialize loopCount to zero */
3130     loopCount = 0;
3131
3132     // If dupCond is true then the loop head contains a test which skips
3133     // this loop, if the constInit does not pass the loop test
3134     // Such a loop can execute zero times.
3135     // If dupCond is false then we have a true do-while loop which we
3136     // always execute the loop once before performing the loop test
3137     if (!dupCond)
3138     {
3139         loopCount += 1;
3140         constInitX += iterInc;
3141     }
3142
3143     // bail if count is based on wrap-around math
3144     if (iterInc > 0)
3145     {
3146         if (constLimitX < constInitX)
3147         {
3148             return false;
3149         }
3150     }
3151     else if (constLimitX > constInitX)
3152     {
3153         return false;
3154     }
3155
3156     /* Compute the number of repetitions */
3157
3158     switch (testOper)
3159     {
3160         __int64 iterAtExitX;
3161
3162         case GT_EQ:
3163             /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
3164             return false;
3165
3166         case GT_NE:
3167             /*  "for (i=init; i != lim; i+=const)" - this is tricky since it may
3168              *  have a constant number of iterations or loop forever -
3169              *  we have to compute (lim-init) mod iterInc to see if it is zero.
3170              * If mod iterInc is not zero then the limit test will miss an a wrap will occur
3171              * which is probably not what the end user wanted, but it is legal.
3172              */
3173
3174             if (iterInc > 0)
3175             {
3176                 /* Stepping by one, i.e. Mod with 1 is always zero */
3177                 if (iterInc != 1)
3178                 {
3179                     if (((constLimitX - constInitX) % iterInc) != 0)
3180                     {
3181                         return false;
3182                     }
3183                 }
3184             }
3185             else
3186             {
3187                 noway_assert(iterInc < 0);
3188                 /* Stepping by -1, i.e. Mod with 1 is always zero */
3189                 if (iterInc != -1)
3190                 {
3191                     if (((constInitX - constLimitX) % (-iterInc)) != 0)
3192                     {
3193                         return false;
3194                     }
3195                 }
3196             }
3197
3198             switch (iterOper)
3199             {
3200 #ifdef LEGACY_BACKEND
3201                 case GT_ASG_SUB:
3202 #endif
3203                 case GT_SUB:
3204                     iterInc = -iterInc;
3205                     __fallthrough;
3206
3207 #ifdef LEGACY_BACKEND
3208                 case GT_ASG_ADD:
3209 #endif
3210                 case GT_ADD:
3211                     if (constInitX != constLimitX)
3212                     {
3213                         loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3214                     }
3215
3216                     iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3217
3218                     if (unsTest)
3219                     {
3220                         iterAtExitX = (unsigned)iterAtExitX;
3221                     }
3222
3223                     // Check if iteration incr will cause overflow for small types
3224                     if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3225                     {
3226                         return false;
3227                     }
3228
3229                     // iterator with 32bit overflow. Bad for TYP_(U)INT
3230                     if (iterAtExitX < constLimitX)
3231                     {
3232                         return false;
3233                     }
3234
3235                     *iterCount = loopCount;
3236                     return true;
3237
3238 #ifdef LEGACY_BACKEND
3239                 case GT_ASG_MUL:
3240                 case GT_ASG_DIV:
3241                 case GT_ASG_RSH:
3242                 case GT_ASG_LSH:
3243                 case GT_ASG_UDIV:
3244 #endif
3245                 case GT_MUL:
3246                 case GT_DIV:
3247                 case GT_RSH:
3248                 case GT_LSH:
3249                 case GT_UDIV:
3250                     return false;
3251
3252                 default:
3253                     noway_assert(!"Unknown operator for loop iterator");
3254                     return false;
3255             }
3256
3257         case GT_LT:
3258             switch (iterOper)
3259             {
3260 #ifdef LEGACY_BACKEND
3261                 case GT_ASG_SUB:
3262 #endif
3263                 case GT_SUB:
3264                     iterInc = -iterInc;
3265                     __fallthrough;
3266
3267 #ifdef LEGACY_BACKEND
3268                 case GT_ASG_ADD:
3269 #endif
3270                 case GT_ADD:
3271                     if (constInitX < constLimitX)
3272                     {
3273                         loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3274                     }
3275
3276                     iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3277
3278                     if (unsTest)
3279                     {
3280                         iterAtExitX = (unsigned)iterAtExitX;
3281                     }
3282
3283                     // Check if iteration incr will cause overflow for small types
3284                     if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3285                     {
3286                         return false;
3287                     }
3288
3289                     // iterator with 32bit overflow. Bad for TYP_(U)INT
3290                     if (iterAtExitX < constLimitX)
3291                     {
3292                         return false;
3293                     }
3294
3295                     *iterCount = loopCount;
3296                     return true;
3297
3298 #ifdef LEGACY_BACKEND
3299                 case GT_ASG_MUL:
3300                 case GT_ASG_DIV:
3301                 case GT_ASG_RSH:
3302                 case GT_ASG_LSH:
3303                 case GT_ASG_UDIV:
3304 #endif
3305                 case GT_MUL:
3306                 case GT_DIV:
3307                 case GT_RSH:
3308                 case GT_LSH:
3309                 case GT_UDIV:
3310                     return false;
3311
3312                 default:
3313                     noway_assert(!"Unknown operator for loop iterator");
3314                     return false;
3315             }
3316
3317         case GT_LE:
3318             switch (iterOper)
3319             {
3320 #ifdef LEGACY_BACKEND
3321                 case GT_ASG_SUB:
3322 #endif
3323                 case GT_SUB:
3324                     iterInc = -iterInc;
3325                     __fallthrough;
3326
3327 #ifdef LEGACY_BACKEND
3328                 case GT_ASG_ADD:
3329 #endif
3330                 case GT_ADD:
3331                     if (constInitX <= constLimitX)
3332                     {
3333                         loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
3334                     }
3335
3336                     iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3337
3338                     if (unsTest)
3339                     {
3340                         iterAtExitX = (unsigned)iterAtExitX;
3341                     }
3342
3343                     // Check if iteration incr will cause overflow for small types
3344                     if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
3345                     {
3346                         return false;
3347                     }
3348
3349                     // iterator with 32bit overflow. Bad for TYP_(U)INT
3350                     if (iterAtExitX <= constLimitX)
3351                     {
3352                         return false;
3353                     }
3354
3355                     *iterCount = loopCount;
3356                     return true;
3357
3358 #ifdef LEGACY_BACKEND
3359                 case GT_ASG_MUL:
3360                 case GT_ASG_DIV:
3361                 case GT_ASG_RSH:
3362                 case GT_ASG_LSH:
3363                 case GT_ASG_UDIV:
3364 #endif
3365                 case GT_MUL:
3366                 case GT_DIV:
3367                 case GT_RSH:
3368                 case GT_LSH:
3369                 case GT_UDIV:
3370                     return false;
3371
3372                 default:
3373                     noway_assert(!"Unknown operator for loop iterator");
3374                     return false;
3375             }
3376
3377         case GT_GT:
3378             switch (iterOper)
3379             {
3380 #ifdef LEGACY_BACKEND
3381                 case GT_ASG_SUB:
3382 #endif
3383                 case GT_SUB:
3384                     iterInc = -iterInc;
3385                     __fallthrough;
3386
3387 #ifdef LEGACY_BACKEND
3388                 case GT_ASG_ADD:
3389 #endif
3390                 case GT_ADD:
3391                     if (constInitX > constLimitX)
3392                     {
3393                         loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
3394                     }
3395
3396                     iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3397
3398                     if (unsTest)
3399                     {
3400                         iterAtExitX = (unsigned)iterAtExitX;
3401                     }
3402
3403                     // Check if small types will underflow
3404                     if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
3405                     {
3406                         return false;
3407                     }
3408
3409                     // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
3410                     if (iterAtExitX > constLimitX)
3411                     {
3412                         return false;
3413                     }
3414
3415                     *iterCount = loopCount;
3416                     return true;
3417
3418 #ifdef LEGACY_BACKEND
3419                 case GT_ASG_MUL:
3420                 case GT_ASG_DIV:
3421                 case GT_ASG_RSH:
3422                 case GT_ASG_LSH:
3423                 case GT_ASG_UDIV:
3424 #endif
3425                 case GT_MUL:
3426                 case GT_DIV:
3427                 case GT_RSH:
3428                 case GT_LSH:
3429                 case GT_UDIV:
3430                     return false;
3431
3432                 default:
3433                     noway_assert(!"Unknown operator for loop iterator");
3434                     return false;
3435             }
3436
3437         case GT_GE:
3438             switch (iterOper)
3439             {
3440 #ifdef LEGACY_BACKEND
3441                 case GT_ASG_SUB:
3442 #endif
3443                 case GT_SUB:
3444                     iterInc = -iterInc;
3445                     __fallthrough;
3446
3447 #ifdef LEGACY_BACKEND
3448                 case GT_ASG_ADD:
3449 #endif
3450                 case GT_ADD:
3451                     if (constInitX >= constLimitX)
3452                     {
3453                         loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
3454                     }
3455
3456                     iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
3457
3458                     if (unsTest)
3459                     {
3460                         iterAtExitX = (unsigned)iterAtExitX;
3461                     }
3462
3463                     // Check if small types will underflow
3464                     if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
3465                     {
3466                         return false;
3467                     }
3468
3469                     // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
3470                     if (iterAtExitX >= constLimitX)
3471                     {
3472                         return false;
3473                     }
3474
3475                     *iterCount = loopCount;
3476                     return true;
3477
3478 #ifdef LEGACY_BACKEND
3479                 case GT_ASG_MUL:
3480                 case GT_ASG_DIV:
3481                 case GT_ASG_RSH:
3482                 case GT_ASG_LSH:
3483                 case GT_ASG_UDIV:
3484 #endif
3485                 case GT_MUL:
3486                 case GT_DIV:
3487                 case GT_RSH:
3488                 case GT_LSH:
3489                 case GT_UDIV:
3490                     return false;
3491
3492                 default:
3493                     noway_assert(!"Unknown operator for loop iterator");
3494                     return false;
3495             }
3496
3497         default:
3498             noway_assert(!"Unknown operator for loop condition");
3499     }
3500
3501     return false;
3502 }
3503
3504 /*****************************************************************************
3505  *
3506  *  Look for loop unrolling candidates and unroll them
3507  */
3508
3509 #ifdef _PREFAST_
3510 #pragma warning(push)
3511 #pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
3512 #endif
3513 void Compiler::optUnrollLoops()
3514 {
3515     if (compCodeOpt() == SMALL_CODE)
3516     {
3517         return;
3518     }
3519
3520     if (optLoopCount == 0)
3521     {
3522         return;
3523     }
3524
3525 #ifdef DEBUG
3526     if (JitConfig.JitNoUnroll())
3527     {
3528         return;
3529     }
3530 #endif
3531
3532 #ifdef DEBUG
3533     if (verbose)
3534     {
3535         printf("*************** In optUnrollLoops()\n");
3536     }
3537 #endif
3538     /* Look for loop unrolling candidates */
3539
3540     bool change = false;
3541
3542     // Visit loops from highest to lowest number to vist them in innermost
3543     // to outermost order
3544     for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum)
3545     {
3546         // This is necessary due to an apparent analysis limitation since
3547         // optLoopCount must be strictly greater than 0 upon entry and lnum
3548         // cannot wrap due to the loop termination condition.
3549         PREFAST_ASSUME(lnum != 0U - 1);
3550
3551         BasicBlock* block;
3552         BasicBlock* head;
3553         BasicBlock* bottom;
3554
3555         GenTree* loop;
3556         GenTree* test;
3557         GenTree* incr;
3558         GenTree* phdr;
3559         GenTree* init;
3560
3561         bool       dupCond;
3562         int        lval;
3563         int        lbeg;         // initial value for iterator
3564         int        llim;         // limit value for iterator
3565         unsigned   lvar;         // iterator lclVar #
3566         int        iterInc;      // value to increment the iterator
3567         genTreeOps iterOper;     // type of iterator increment (i.e. ADD, SUB, etc.)
3568         var_types  iterOperType; // type result of the oper (for overflow instrs)
3569         genTreeOps testOper;     // type of loop test (i.e. GT_LE, GT_GE, etc.)
3570         bool       unsTest;      // Is the comparison u/int
3571
3572         unsigned loopRetCount;  // number of BBJ_RETURN blocks in loop
3573         unsigned totalIter;     // total number of iterations in the constant loop
3574         unsigned loopFlags;     // actual lpFlags
3575         unsigned requiredFlags; // required lpFlags
3576
3577         static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
3578             10, // BLENDED_CODE
3579             0,  // SMALL_CODE
3580             20, // FAST_CODE
3581             0   // COUNT_OPT_CODE
3582         };
3583
3584         noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
3585         noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
3586
3587         unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
3588
3589 #ifdef DEBUG
3590         if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
3591         {
3592             iterLimit *= 10;
3593         }
3594 #endif
3595
3596         static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
3597             300, // BLENDED_CODE
3598             0,   // SMALL_CODE
3599             600, // FAST_CODE
3600             0    // COUNT_OPT_CODE
3601         };
3602
3603         noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
3604         noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
3605
3606         int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
3607
3608         loopFlags = optLoopTable[lnum].lpFlags;
3609         // Check for required flags:
3610         // LPFLG_DO_WHILE - required because this transform only handles loops of this form
3611         // LPFLG_CONST - required because this transform only handles full unrolls
3612         // LPFLG_SIMD_LIMIT - included here as a heuristic, not for correctness/structural reasons
3613         requiredFlags = LPFLG_DO_WHILE | LPFLG_CONST | LPFLG_SIMD_LIMIT;
3614
3615 #ifdef DEBUG
3616         if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
3617         {
3618             // In stress mode, quadruple the size limit, and drop
3619             // the restriction that loop limit must be Vector<T>.Count.
3620
3621             unrollLimitSz *= 4;
3622             requiredFlags &= ~LPFLG_SIMD_LIMIT;
3623         }
3624 #endif
3625
3626         /* Ignore the loop if we don't have a do-while
3627         that has a constant number of iterations */
3628
3629         if ((loopFlags & requiredFlags) != requiredFlags)
3630         {
3631             continue;
3632         }
3633
3634         /* ignore if removed or marked as not unrollable */
3635
3636         if (loopFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
3637         {
3638             continue;
3639         }
3640
3641         head = optLoopTable[lnum].lpHead;
3642         noway_assert(head);
3643         bottom = optLoopTable[lnum].lpBottom;
3644         noway_assert(bottom);
3645
3646         /* Get the loop data:
3647             - initial constant
3648             - limit constant
3649             - iterator
3650             - iterator increment
3651             - increment operation type (i.e. ADD, SUB, etc...)
3652             - loop test type (i.e. GT_GE, GT_LT, etc...)
3653             */
3654
3655         lbeg     = optLoopTable[lnum].lpConstInit;
3656         llim     = optLoopTable[lnum].lpConstLimit();
3657         testOper = optLoopTable[lnum].lpTestOper();
3658
3659         lvar     = optLoopTable[lnum].lpIterVar();
3660         iterInc  = optLoopTable[lnum].lpIterConst();
3661         iterOper = optLoopTable[lnum].lpIterOper();
3662
3663         iterOperType = optLoopTable[lnum].lpIterOperType();
3664         unsTest      = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
3665
3666         if (lvaTable[lvar].lvAddrExposed)
3667         { // If the loop iteration variable is address-exposed then bail
3668             continue;
3669         }
3670         if (lvaTable[lvar].lvIsStructField)
3671         { // If the loop iteration variable is a promoted field from a struct then
3672             // bail
3673             continue;
3674         }
3675
3676         /* Locate the pre-header and initialization and increment/test statements */
3677
3678         phdr = head->bbTreeList;
3679         noway_assert(phdr);
3680         loop = bottom->bbTreeList;
3681         noway_assert(loop);
3682
3683         init = head->lastStmt();
3684         noway_assert(init && (init->gtNext == nullptr));
3685         test = bottom->lastStmt();
3686         noway_assert(test && (test->gtNext == nullptr));
3687         incr = test->gtPrev;
3688         noway_assert(incr);
3689
3690         if (init->gtFlags & GTF_STMT_CMPADD)
3691         {
3692             /* Must be a duplicated loop condition */
3693             noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3694
3695             dupCond = true;
3696             init    = init->gtPrev;
3697             noway_assert(init);
3698         }
3699         else
3700         {
3701             dupCond = false;
3702         }
3703
3704         /* Find the number of iterations - the function returns false if not a constant number */
3705
3706         if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
3707         {
3708             continue;
3709         }
3710
3711         /* Forget it if there are too many repetitions or not a constant loop */
3712
3713         if (totalIter > iterLimit)
3714         {
3715             continue;
3716         }
3717
3718         noway_assert(init->gtOper == GT_STMT);
3719         init = init->gtStmt.gtStmtExpr;
3720         noway_assert(test->gtOper == GT_STMT);
3721         test = test->gtStmt.gtStmtExpr;
3722         noway_assert(incr->gtOper == GT_STMT);
3723         incr = incr->gtStmt.gtStmtExpr;
3724
3725         // Don't unroll loops we don't understand.
3726         if (incr->gtOper != GT_ASG)
3727         {
3728             continue;
3729         }
3730         incr = incr->gtOp.gtOp2;
3731
3732         /* Make sure everything looks ok */
3733         if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3734             (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3735             (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
3736
3737             !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
3738             (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
3739             (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
3740
3741             (test->gtOper != GT_JTRUE))
3742         {
3743             noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
3744             continue;
3745         }
3746
3747         /* heuristic - Estimated cost in code size of the unrolled loop */
3748
3749         {
3750             ClrSafeInt<unsigned> loopCostSz; // Cost is size of one iteration
3751
3752             block         = head->bbNext;
3753             auto tryIndex = block->bbTryIndex;
3754
3755             loopRetCount = 0;
3756             for (;; block = block->bbNext)
3757             {
3758                 if (block->bbTryIndex != tryIndex)
3759                 {
3760                     // Unrolling would require cloning EH regions
3761                     goto DONE_LOOP;
3762                 }
3763
3764                 if (block->bbJumpKind == BBJ_RETURN)
3765                 {
3766                     ++loopRetCount;
3767                 }
3768
3769                 /* Visit all the statements in the block */
3770
3771                 for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
3772                 {
3773                     /* Calculate gtCostSz */
3774                     gtSetStmtInfo(stmt);
3775
3776                     /* Update loopCostSz */
3777                     loopCostSz += stmt->gtCostSz;
3778                 }
3779
3780                 if (block == bottom)
3781                 {
3782                     break;
3783                 }
3784             }
3785
3786 #ifdef JIT32_GCENCODER
3787             if (fgReturnCount + loopRetCount * (totalIter - 1) > SET_EPILOGCNT_MAX)
3788             {
3789                 // Jit32 GC encoder can't report more than SET_EPILOGCNT_MAX epilogs.
3790                 goto DONE_LOOP;
3791             }
3792 #endif // !JIT32_GCENCODER
3793
3794             /* Compute the estimated increase in code size for the unrolled loop */
3795
3796             ClrSafeInt<unsigned> fixedLoopCostSz(8);
3797
3798             ClrSafeInt<int> unrollCostSz = ClrSafeInt<int>(loopCostSz * ClrSafeInt<unsigned>(totalIter)) -
3799                                            ClrSafeInt<int>(loopCostSz + fixedLoopCostSz);
3800
3801             /* Don't unroll if too much code duplication would result. */
3802
3803             if (unrollCostSz.IsOverflow() || (unrollCostSz.Value() > unrollLimitSz))
3804             {
3805                 goto DONE_LOOP;
3806             }
3807
3808             /* Looks like a good idea to unroll this loop, let's do it! */
3809             CLANG_FORMAT_COMMENT_ANCHOR;
3810
3811 #ifdef DEBUG
3812             if (verbose)
3813             {
3814                 printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
3815                 if (head->bbNext->bbNum != bottom->bbNum)
3816                 {
3817                     printf("..BB%02u", bottom->bbNum);
3818                 }
3819                 printf(" over V%02u from %u to %u", lvar, lbeg, llim);
3820                 printf(" unrollCostSz = %d\n", unrollCostSz);
3821                 printf("\n");
3822             }
3823 #endif
3824         }
3825
3826         /* Create the unrolled loop statement list */
3827         {
3828             BlockToBlockMap blockMap(getAllocator());
3829             BasicBlock*     insertAfter = bottom;
3830
3831             for (lval = lbeg; totalIter; totalIter--)
3832             {
3833                 for (block = head->bbNext;; block = block->bbNext)
3834                 {
3835                     BasicBlock* newBlock = insertAfter =
3836                         fgNewBBafter(block->bbJumpKind, insertAfter, /*extendRegion*/ true);
3837                     blockMap.Set(block, newBlock);
3838
3839                     if (!BasicBlock::CloneBlockState(this, newBlock, block, lvar, lval))
3840                     {
3841                         // cloneExpr doesn't handle everything
3842                         BasicBlock* oldBottomNext = insertAfter->bbNext;
3843                         bottom->bbNext            = oldBottomNext;
3844                         oldBottomNext->bbPrev     = bottom;
3845                         optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
3846                         goto DONE_LOOP;
3847                     }
3848                     // Block weight should no longer have the loop multiplier
3849                     newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
3850                     // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
3851                     assert(newBlock->bbJumpDest == nullptr);
3852
3853                     if (block == bottom)
3854                     {
3855                         // Remove the test; we're doing a full unroll.
3856
3857                         GenTreeStmt* testCopyStmt = newBlock->lastStmt();
3858                         GenTree*     testCopyExpr = testCopyStmt->gtStmt.gtStmtExpr;
3859                         assert(testCopyExpr->gtOper == GT_JTRUE);
3860                         GenTree* sideEffList = nullptr;
3861                         gtExtractSideEffList(testCopyExpr, &sideEffList, GTF_SIDE_EFFECT | GTF_ORDER_SIDEEFF);
3862                         if (sideEffList == nullptr)
3863                         {
3864                             fgRemoveStmt(newBlock, testCopyStmt);
3865                         }
3866                         else
3867                         {
3868                             testCopyStmt->gtStmt.gtStmtExpr = sideEffList;
3869                         }
3870                         newBlock->bbJumpKind = BBJ_NONE;
3871
3872                         // Exit this loop; we've walked all the blocks.
3873                         break;
3874                     }
3875                 }
3876
3877                 // Now redirect any branches within the newly-cloned iteration
3878                 for (block = head->bbNext; block != bottom; block = block->bbNext)
3879                 {
3880                     BasicBlock* newBlock = blockMap[block];
3881                     optCopyBlkDest(block, newBlock);
3882                     optRedirectBlock(newBlock, &blockMap);
3883                 }
3884
3885                 /* update the new value for the unrolled iterator */
3886
3887                 switch (iterOper)
3888                 {
3889                     case GT_ADD:
3890                         lval += iterInc;
3891                         break;
3892
3893                     case GT_SUB:
3894                         lval -= iterInc;
3895                         break;
3896
3897                     case GT_RSH:
3898                     case GT_LSH:
3899                         noway_assert(!"Unrolling not implemented for this loop iterator");
3900                         goto DONE_LOOP;
3901
3902                     default:
3903                         noway_assert(!"Unknown operator for constant loop iterator");
3904                         goto DONE_LOOP;
3905                 }
3906             }
3907
3908             // Gut the old loop body
3909             for (block = head->bbNext;; block = block->bbNext)
3910             {
3911                 block->bbTreeList = nullptr;
3912                 block->bbJumpKind = BBJ_NONE;
3913                 block->bbFlags &= ~(BBF_NEEDS_GCPOLL | BBF_LOOP_HEAD);
3914                 if (block->bbJumpDest != nullptr)
3915                 {
3916                     block->bbJumpDest = nullptr;
3917                 }
3918
3919                 if (block == bottom)
3920                 {
3921                     break;
3922                 }
3923             }
3924
3925             /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
3926
3927             if (head->bbJumpKind == BBJ_COND)
3928             {
3929                 phdr = head->bbTreeList;
3930                 noway_assert(phdr);
3931                 test = phdr->gtPrev;
3932
3933                 noway_assert(test && (test->gtNext == nullptr));
3934                 noway_assert(test->gtOper == GT_STMT);
3935                 noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
3936
3937                 init = test->gtPrev;
3938                 noway_assert(init && (init->gtNext == test));
3939                 noway_assert(init->gtOper == GT_STMT);
3940
3941                 init->gtNext     = nullptr;
3942                 phdr->gtPrev     = init;
3943                 head->bbJumpKind = BBJ_NONE;
3944                 head->bbFlags &= ~BBF_NEEDS_GCPOLL;
3945             }
3946             else
3947             {
3948                 /* the loop must execute */
3949                 noway_assert(head->bbJumpKind == BBJ_NONE);
3950             }
3951
3952 #ifdef DEBUG
3953             if (verbose)
3954             {
3955                 printf("Whole unrolled loop:\n");
3956
3957                 gtDispTree(init);
3958                 printf("\n");
3959                 fgDumpTrees(head->bbNext, insertAfter);
3960             }
3961 #endif
3962
3963             /* Remember that something has changed */
3964
3965             change = true;
3966
3967             /* Make sure to update loop table */
3968
3969             /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
3970                 * (also make head and bottom NULL - to hit an assert or GPF) */
3971
3972             optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
3973             optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
3974
3975             // Note if we created new BBJ_RETURNs
3976             fgReturnCount += loopRetCount * (totalIter - 1);
3977         }
3978
3979     DONE_LOOP:;
3980     }
3981
3982     if (change)
3983     {
3984         fgUpdateChangedFlowGraph();
3985     }
3986
3987 #ifdef DEBUG
3988     fgDebugCheckBBlist(true);
3989 #endif
3990 }
3991 #ifdef _PREFAST_
3992 #pragma warning(pop)
3993 #endif
3994
3995 /*****************************************************************************
3996  *
3997  *  Return false if there is a code path from 'topBB' to 'botBB' that might
3998  *  not execute a method call.
3999  */
4000
4001 bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
4002 {
4003     // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
4004     // as some helper calls are neither interruptible nor hijackable.
4005     // When we can determine this, then we can set BBF_GC_SAFE_POINT for
4006     // those helpers too.
4007
4008     noway_assert(topBB->bbNum <= botBB->bbNum);
4009
4010     // We can always check topBB and botBB for any gc safe points and early out
4011
4012     if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
4013     {
4014         return false;
4015     }
4016
4017     // Otherwise we will need to rely upon the dominator sets
4018
4019     if (!fgDomsComputed)
4020     {
4021         // return a conservative answer of true when we don't have the dominator sets
4022         return true;
4023     }
4024
4025     BasicBlock* curBB = topBB;
4026     for (;;)
4027     {
4028         noway_assert(curBB);
4029
4030         // If we added a loop pre-header block then we will
4031         //  have a bbNum greater than fgLastBB, and we won't have
4032         //  any dominator information about this block, so skip it.
4033         //
4034         if (curBB->bbNum <= fgLastBB->bbNum)
4035         {
4036             noway_assert(curBB->bbNum <= botBB->bbNum);
4037
4038             // Does this block contain a gc safe point?
4039
4040             if (curBB->bbFlags & BBF_GC_SAFE_POINT)
4041             {
4042                 // Will this block always execute on the way to botBB ?
4043                 //
4044                 // Since we are checking every block in [topBB .. botBB] and we are using
4045                 // a lexical definition of a loop.
4046                 //  (all that we know is that is that botBB is a back-edge to topBB)
4047                 // Thus while walking blocks in this range we may encounter some blocks
4048                 // that are not really part of the loop, and so we need to perform
4049                 // some additional checks:
4050                 //
4051                 // We will check that the current 'curBB' is reachable from 'topBB'
4052                 // and that it dominates the block containing the back-edge 'botBB'
4053                 // When both of these are true then we know that the gcsafe point in 'curBB'
4054                 // will be encountered in the loop and we can return false
4055                 //
4056                 if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
4057                 {
4058                     return false;
4059                 }
4060             }
4061             else
4062             {
4063                 // If we've reached the destination block, then we're done
4064
4065                 if (curBB == botBB)
4066                 {
4067                     break;
4068                 }
4069             }
4070         }
4071
4072         curBB = curBB->bbNext;
4073     }
4074
4075     // If we didn't find any blocks that contained a gc safe point and
4076     // also met the fgDominate and fgReachable criteria then we must return true
4077     //
4078     return true;
4079 }
4080
4081 /*****************************************************************************
4082  *
4083  * Find the loop termination test at the bottom of the loop
4084  */
4085
4086 static GenTree* optFindLoopTermTest(BasicBlock* bottom)
4087 {
4088     GenTree* testt = bottom->bbTreeList;
4089
4090     assert(testt && testt->gtOper == GT_STMT);
4091
4092     GenTree* result = testt->gtPrev;
4093
4094 #ifdef DEBUG
4095     while (testt->gtNext)
4096     {
4097         testt = testt->gtNext;
4098     }
4099
4100     assert(testt == result);
4101 #endif
4102
4103     return result;
4104 }
4105
4106 /*****************************************************************************
4107  * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
4108  */
4109
4110 void Compiler::fgOptWhileLoop(BasicBlock* block)
4111 {
4112     noway_assert(!opts.MinOpts() && !opts.compDbgCode);
4113     noway_assert(compCodeOpt() != SMALL_CODE);
4114
4115     /*
4116         Optimize while loops into do { } while loop
4117         Our loop hoisting logic requires do { } while loops.
4118         Specifically, we're looking for the following case:
4119
4120                 ...
4121                 jmp test
4122         loop:
4123                 ...
4124                 ...
4125         test:
4126                 cond
4127                 jtrue   loop
4128
4129         If we find this, and the condition is simple enough, we change
4130         the loop to the following:
4131
4132                 ...
4133                 cond
4134                 jfalse done
4135                 // else fall-through
4136         loop:
4137                 ...
4138                 ...
4139         test:
4140                 cond
4141                 jtrue   loop
4142         done:
4143
4144      */
4145
4146     /* Does the BB end with an unconditional jump? */
4147
4148     if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
4149     { // It can't be one of the ones we use for our exception magic
4150         return;
4151     }
4152
4153     // It has to be a forward jump
4154     //  TODO-CQ: Check if we can also optimize the backwards jump as well.
4155     //
4156     if (fgIsForwardBranch(block) == false)
4157     {
4158         return;
4159     }
4160
4161     // Get hold of the jump target
4162     BasicBlock* bTest = block->bbJumpDest;
4163
4164     // Does the block consist of 'jtrue(cond) block' ?
4165     if (bTest->bbJumpKind != BBJ_COND)
4166     {
4167         return;
4168     }
4169
4170     // bTest must be a backwards jump to block->bbNext
4171     if (bTest->bbJumpDest != block->bbNext)
4172     {
4173         return;
4174     }
4175
4176     // Since test is a BBJ_COND it will have a bbNext
4177     noway_assert(bTest->bbNext);
4178
4179     // 'block' must be in the same try region as the condition, since we're going to insert
4180     // a duplicated condition in 'block', and the condition might include exception throwing code.
4181     if (!BasicBlock::sameTryRegion(block, bTest))
4182     {
4183         return;
4184     }
4185
4186     // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
4187     // same try region (or no try region) to avoid generating illegal flow.
4188     BasicBlock* bTestNext = bTest->bbNext;
4189     if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
4190     {
4191         return;
4192     }
4193
4194     GenTree* condStmt = optFindLoopTermTest(bTest);
4195
4196     // bTest must only contain only a jtrue with no other stmts, we will only clone
4197     // the conditional, so any other statements will not get cloned
4198     //  TODO-CQ: consider cloning the whole bTest block as inserting it after block.
4199     //
4200     if (bTest->bbTreeList != condStmt)
4201     {
4202         return;
4203     }
4204
4205     /* Get to the condition node from the statement tree */
4206
4207     noway_assert(condStmt->gtOper == GT_STMT);
4208
4209     GenTree* condTree = condStmt->gtStmt.gtStmtExpr;
4210     noway_assert(condTree->gtOper == GT_JTRUE);
4211
4212     condTree = condTree->gtOp.gtOp1;
4213
4214     // The condTree has to be a RelOp comparison
4215     //  TODO-CQ: Check if we can also optimize the backwards jump as well.
4216     //
4217     if (condTree->OperIsCompare() == false)
4218     {
4219         return;
4220     }
4221
4222     /* We call gtPrepareCost to measure the cost of duplicating this tree */
4223
4224     gtPrepareCost(condTree);
4225     unsigned estDupCostSz = condTree->gtCostSz;
4226
4227     double loopIterations = (double)BB_LOOP_WEIGHT;
4228
4229     bool                 allProfileWeightsAreValid = false;
4230     BasicBlock::weight_t weightBlock               = block->bbWeight;
4231     BasicBlock::weight_t weightTest                = bTest->bbWeight;
4232     BasicBlock::weight_t weightNext                = block->bbNext->bbWeight;
4233
4234     // If we have profile data then we calculate the number of time
4235     // the loop will iterate into loopIterations
4236     if (fgIsUsingProfileWeights())
4237     {
4238         // Only rely upon the profile weight when all three of these blocks
4239         // have good profile weights
4240         if (block->hasProfileWeight() && bTest->hasProfileWeight() && block->bbNext->hasProfileWeight())
4241         {
4242             allProfileWeightsAreValid = true;
4243
4244             // If this while loop never iterates then don't bother transforming
4245             if (weightNext == 0)
4246             {
4247                 return;
4248             }
4249
4250             // with (weighNext > 0) we should also have (weightTest >= weightBlock)
4251             // if the profile weights are all valid.
4252             //
4253             //   weightNext is the number of time this loop iterates
4254             //   weightBlock is the number of times that we enter the while loop
4255             //   loopIterations is the average number of times that this loop iterates
4256             //
4257             if (weightTest >= weightBlock)
4258             {
4259                 loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
4260             }
4261         }
4262     }
4263
4264     unsigned maxDupCostSz = 32;
4265
4266     // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
4267     // set loop weights yet
4268     if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
4269     {
4270         maxDupCostSz *= 4;
4271     }
4272
4273     // If this loop iterates a lot then raise the maxDupCost
4274     if (loopIterations >= 12.0)
4275     {
4276         maxDupCostSz *= 2;
4277     }
4278     if (loopIterations >= 96.0)
4279     {
4280         maxDupCostSz *= 2;
4281     }
4282
4283     // If the loop condition has a shared static helper, we really want this loop converted
4284     // as not converting the loop will disable loop hoisting, meaning the shared helper will
4285     // be executed on every loop iteration.
4286     int countOfHelpers = 0;
4287     fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
4288
4289     if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
4290     {
4291         maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
4292     }
4293
4294     // If the compare has too high cost then we don't want to dup
4295
4296     bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
4297
4298 #ifdef DEBUG
4299     if (verbose)
4300     {
4301         printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
4302                "\n   loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
4303                condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
4304                costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
4305                allProfileWeightsAreValid ? "true" : "false");
4306     }
4307 #endif
4308
4309     if (costIsTooHigh)
4310     {
4311         return;
4312     }
4313
4314     /* Looks good - duplicate the condition test */
4315
4316     condTree->gtFlags |= GTF_RELOP_ZTT;
4317
4318     condTree = gtCloneExpr(condTree);
4319     gtReverseCond(condTree);
4320
4321     // Make sure clone expr copied the flag
4322     assert(condTree->gtFlags & GTF_RELOP_ZTT);
4323
4324     condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
4325
4326     /* Create a statement entry out of the condition and
4327        append the condition test at the end of 'block' */
4328
4329     GenTree* copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
4330
4331     copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
4332
4333     if (opts.compDbgInfo)
4334     {
4335         copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
4336     }
4337
4338     // Flag the block that received the copy as potentially having an array/vtable
4339     // reference if the block copied from did; this is a conservative guess.
4340     if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
4341     {
4342         block->bbFlags |= copyFlags;
4343     }
4344
4345     // If we have profile data for all blocks and we know that we are cloning the
4346     //  bTest block into block and thus changing the control flow from block so
4347     //  that it no longer goes directly to bTest anymore, we have to adjust the
4348     //  weight of bTest by subtracting out the weight of block.
4349     //
4350     if (allProfileWeightsAreValid)
4351     {
4352         //
4353         // Some additional sanity checks before adjusting the weight of bTest
4354         //
4355         if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
4356         {
4357             // Get the two edge that flow out of bTest
4358             flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
4359             flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
4360
4361             // Calculate the new weight for block bTest
4362
4363             BasicBlock::weight_t newWeightTest =
4364                 (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
4365             bTest->bbWeight = newWeightTest;
4366
4367             if (newWeightTest == BB_ZERO_WEIGHT)
4368             {
4369                 bTest->bbFlags |= BBF_RUN_RARELY;
4370                 // All out edge weights are set to zero
4371                 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
4372                 edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
4373                 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
4374                 edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
4375             }
4376             else
4377             {
4378                 // Update the our edge weights
4379                 edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
4380                 edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
4381                 edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
4382                 edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
4383             }
4384         }
4385     }
4386
4387     /* Change the block to end with a conditional jump */
4388
4389     block->bbJumpKind = BBJ_COND;
4390     block->bbJumpDest = bTest->bbNext;
4391
4392     /* Mark the jump dest block as being a jump target */
4393     block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
4394
4395     /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
4396
4397     fgAddRefPred(block->bbNext, block);
4398
4399     fgRemoveRefPred(bTest, block);
4400     fgAddRefPred(bTest->bbNext, block);
4401
4402 #ifdef DEBUG
4403     if (verbose)
4404     {
4405         printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
4406                bTest->bbNum);
4407         printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
4408
4409         gtDispTree(copyOfCondStmt);
4410     }
4411
4412 #endif
4413 }
4414
4415 /*****************************************************************************
4416  *
4417  *  Optimize the BasicBlock layout of the method
4418  */
4419
4420 void Compiler::optOptimizeLayout()
4421 {
4422     noway_assert(!opts.MinOpts() && !opts.compDbgCode);
4423
4424 #ifdef DEBUG
4425     if (verbose)
4426     {
4427         printf("*************** In optOptimizeLayout()\n");
4428         fgDispHandlerTab();
4429     }
4430
4431     /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
4432     fgDebugCheckBBlist();
4433 #endif
4434
4435     noway_assert(fgModified == false);
4436
4437     for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
4438     {
4439         /* Make sure the appropriate fields are initialized */
4440
4441         if (block->bbWeight == BB_ZERO_WEIGHT)
4442         {
4443             /* Zero weighted block can't have a LOOP_HEAD flag */
4444             noway_assert(block->isLoopHead() == false);
4445             continue;
4446         }
4447
4448         assert(block->bbLoopNum == 0);
4449
4450         if (compCodeOpt() != SMALL_CODE)
4451         {
4452             /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
4453
4454             fgOptWhileLoop(block);
4455         }
4456     }
4457
4458     if (fgModified)
4459     {
4460         // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
4461         fgComputeEdgeWeights();
4462     }
4463
4464     fgUpdateFlowGraph(true);
4465     fgReorderBlocks();
4466     fgUpdateFlowGraph();
4467 }
4468
4469 /*****************************************************************************
4470  *
4471  *  Perform loop inversion, find and classify natural loops
4472  */
4473
4474 void Compiler::optOptimizeLoops()
4475 {
4476     noway_assert(!opts.MinOpts() && !opts.compDbgCode);
4477
4478 #ifdef DEBUG
4479     if (verbose)
4480     {
4481         printf("*************** In optOptimizeLoops()\n");
4482     }
4483 #endif
4484
4485     optSetBlockWeights();
4486
4487     /* Were there any loops in the flow graph? */
4488
4489     if (fgHasLoops)
4490     {
4491         /* now that we have dominator information we can find loops */
4492
4493         optFindNaturalLoops();
4494
4495         unsigned loopNum = 0;
4496
4497         /* Iterate over the flow graph, marking all loops */
4498
4499         /* We will use the following terminology:
4500          * top        - the first basic block in the loop (i.e. the head of the backward edge)
4501          * bottom     - the last block in the loop (i.e. the block from which we jump to the top)
4502          * lastBottom - used when we have multiple back-edges to the same top
4503          */
4504
4505         flowList* pred;
4506
4507         BasicBlock* top;
4508
4509         for (top = fgFirstBB; top; top = top->bbNext)
4510         {
4511             BasicBlock* foundBottom = nullptr;
4512
4513             for (pred = top->bbPreds; pred; pred = pred->flNext)
4514             {
4515                 /* Is this a loop candidate? - We look for "back edges" */
4516
4517                 BasicBlock* bottom = pred->flBlock;
4518
4519                 /* is this a backward edge? (from BOTTOM to TOP) */
4520
4521                 if (top->bbNum > bottom->bbNum)
4522                 {
4523                     continue;
4524                 }
4525
4526                 /* 'top' also must have the BBF_LOOP_HEAD flag set */
4527
4528                 if (top->isLoopHead() == false)
4529                 {
4530                     continue;
4531                 }
4532
4533                 /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
4534
4535                 if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
4536                 {
4537                     continue;
4538                 }
4539
4540                 /* the top block must be able to reach the bottom block */
4541                 if (!fgReachable(top, bottom))
4542                 {
4543                     continue;
4544                 }
4545
4546                 /* Found a new loop, record the longest backedge in foundBottom */
4547
4548                 if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
4549                 {
4550                     foundBottom = bottom;
4551                 }
4552             }
4553
4554             if (foundBottom)
4555             {
4556                 loopNum++;
4557 #ifdef DEBUG
4558                 /* Mark the loop header as such */
4559                 assert(FitsIn<unsigned char>(loopNum));
4560                 top->bbLoopNum = (unsigned char)loopNum;
4561 #endif
4562
4563                 /* Mark all blocks between 'top' and 'bottom' */
4564
4565                 optMarkLoopBlocks(top, foundBottom, false);
4566             }
4567
4568             // We track at most 255 loops
4569             if (loopNum == 255)
4570             {
4571 #if COUNT_LOOPS
4572                 totalUnnatLoopOverflows++;
4573 #endif
4574                 break;
4575             }
4576         }
4577
4578 #if COUNT_LOOPS
4579         totalUnnatLoopCount += loopNum;
4580 #endif
4581
4582 #ifdef DEBUG
4583         if (verbose)
4584         {
4585             if (loopNum > 0)
4586             {
4587                 printf("\nFound a total of %d loops.", loopNum);
4588                 printf("\nAfter loop weight marking:\n");
4589                 fgDispBasicBlocks();
4590                 printf("\n");
4591             }
4592         }
4593 #endif
4594         optLoopsMarked = true;
4595     }
4596 }
4597
4598 //------------------------------------------------------------------------
4599 // optDeriveLoopCloningConditions: Derive loop cloning conditions.
4600 //
4601 // Arguments:
4602 //     loopNum     -  the current loop index for which conditions are derived.
4603 //     context     -  data structure where all loop cloning info is kept.
4604 //
4605 // Return Value:
4606 //     "false" if conditions cannot be obtained. "true" otherwise.
4607 //     The cloning conditions are updated in the "conditions"[loopNum] field
4608 //     of the "context" parameter.
4609 //
4610 // Operation:
4611 //     Inspect the loop cloning optimization candidates and populate the conditions necessary
4612 //     for each optimization candidate. Checks if the loop stride is "> 0" if the loop
4613 //     condition is "less than". If the initializer is "var" init then adds condition
4614 //     "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
4615 //     are added to "context". These conditions are checked in the pre-header block
4616 //     and the cloning choice is made.
4617 //
4618 // Assumption:
4619 //      Callers should assume AND operation is used i.e., if all conditions are
4620 //      true, then take the fast path.
4621 //
4622 bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
4623 {
4624     JITDUMP("------------------------------------------------------------\n");
4625     JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
4626
4627     LoopDsc*                         loop     = &optLoopTable[loopNum];
4628     JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4629
4630     if (loop->lpTestOper() == GT_LT)
4631     {
4632         // Stride conditions
4633         if (loop->lpIterConst() <= 0)
4634         {
4635             JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
4636             return false;
4637         }
4638
4639         // Init conditions
4640         if (loop->lpFlags & LPFLG_CONST_INIT)
4641         {
4642             // Only allowing const init at this time.
4643             if (loop->lpConstInit < 0)
4644             {
4645                 JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
4646                 return false;
4647             }
4648         }
4649         else if (loop->lpFlags & LPFLG_VAR_INIT)
4650         {
4651             // limitVar >= 0
4652             LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
4653                                 LC_Expr(LC_Ident(0, LC_Ident::Const)));
4654             context->EnsureConditions(loopNum)->Push(geZero);
4655         }
4656         else
4657         {
4658             JITDUMP("> Not variable init\n");
4659             return false;
4660         }
4661
4662         // Limit Conditions
4663         LC_Ident ident;
4664         if (loop->lpFlags & LPFLG_CONST_LIMIT)
4665         {
4666             int limit = loop->lpConstLimit();
4667             if (limit < 0)
4668             {
4669                 JITDUMP("> limit %d is invalid\n", limit);
4670                 return false;
4671             }
4672             ident = LC_Ident(static_cast<unsigned>(limit), LC_Ident::Const);
4673         }
4674         else if (loop->lpFlags & LPFLG_VAR_LIMIT)
4675         {
4676             unsigned limitLcl = loop->lpVarLimit();
4677             ident             = LC_Ident(limitLcl, LC_Ident::Var);
4678
4679             LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
4680
4681             context->EnsureConditions(loopNum)->Push(geZero);
4682         }
4683         else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
4684         {
4685             ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
4686             if (!loop->lpArrLenLimit(this, index))
4687             {
4688                 JITDUMP("> ArrLen not matching");
4689                 return false;
4690             }
4691             ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
4692
4693             // Ensure that this array must be dereference-able, before executing the actual condition.
4694             LC_Array array(LC_Array::Jagged, index, LC_Array::None);
4695             context->EnsureDerefs(loopNum)->Push(array);
4696         }
4697         else
4698         {
4699             JITDUMP("> Undetected limit\n");
4700             return false;
4701         }
4702
4703         for (unsigned i = 0; i < optInfos->Size(); ++i)
4704         {
4705             LcOptInfo* optInfo = optInfos->GetRef(i);
4706             switch (optInfo->GetOptType())
4707             {
4708                 case LcOptInfo::LcJaggedArray:
4709                 {
4710                     // limit <= arrLen
4711                     LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4712                     LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
4713                     LC_Ident arrLenIdent = LC_Ident(arrLen);
4714
4715                     LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
4716                     context->EnsureConditions(loopNum)->Push(cond);
4717
4718                     // Ensure that this array must be dereference-able, before executing the actual condition.
4719                     LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
4720                     context->EnsureDerefs(loopNum)->Push(array);
4721                 }
4722                 break;
4723                 case LcOptInfo::LcMdArray:
4724                 {
4725                     // limit <= mdArrLen
4726                     LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
4727                     LC_Condition      cond(GT_LE, LC_Expr(ident),
4728                                       LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
4729                                                                 mdArrInfo->GetArrIndexForDim(getAllocator()),
4730                                                                 mdArrInfo->dim, LC_Array::None))));
4731                     context->EnsureConditions(loopNum)->Push(cond);
4732                 }
4733                 break;
4734
4735                 default:
4736                     JITDUMP("Unknown opt\n");
4737                     return false;
4738             }
4739         }
4740         JITDUMP("Conditions: (");
4741         DBEXEC(verbose, context->PrintConditions(loopNum));
4742         JITDUMP(")\n");
4743         return true;
4744     }
4745     return false;
4746 }
4747
4748 //------------------------------------------------------------------------------------
4749 // optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
4750 //
4751 // Arguments:
4752 //     loopNum     -  the current loop index for which conditions are derived.
4753 //     context     -  data structure where all loop cloning info is kept.
4754 //
4755 // Return Value:
4756 //     "false" if conditions cannot be obtained. "true" otherwise.
4757 //     The deref conditions are updated in the "derefConditions"[loopNum] field
4758 //     of the "context" parameter.
4759 //
4760 // Definition of Deref Conditions:
4761 //     To be able to check for the loop cloning condition that (limitVar <= a.len)
4762 //     we should first be able to dereference "a". i.e., "a" is non-null.
4763 //
4764 //     Example:
4765 //
4766 //     for (i in 0..n)
4767 //       for (j in 0..n)
4768 //         for (k in 0..n)      // Inner most loop is being cloned. Cloning needs to check if
4769 //                              // (n <= a[i][j].len) and other safer conditions to take the fast path
4770 //           a[i][j][k] = 0;
4771 //
4772 //     Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
4773 //     This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
4774 //     be true to do the deref.
4775 //
4776 //     (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
4777 //
4778 //     Note the short circuiting AND. Implication: these conditions should be performed in separate
4779 //     blocks each of which will branch to slow path if the condition evaluates to false.
4780 //
4781 //     Now, imagine a situation where we have
4782 //      a[x][y][k] = 20 and a[i][j][k] = 0
4783 //     also in the inner most loop where x, y are parameters, then our conditions will have
4784 //     to include
4785 //     (x < a.len) &&
4786 //     (y < a[x].len)
4787 //     in addition to the above conditions (1) to get rid of bounds check on index 'k'
4788 //
4789 //     But these conditions can be checked together with conditions
4790 //     (i < a.len) without a need for a separate block. In summary, the conditions will be:
4791 //
4792 //     (a != null) &&
4793 //     ((i < a.len) & (x < a.len)) &&      <-- Note the bitwise AND here.
4794 //     (a[i] != null & a[x] != null) &&    <-- Note the bitwise AND here.
4795 //     (j < a[i].len & y < a[x].len) &&    <-- Note the bitwise AND here.
4796 //     (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
4797 //
4798 //     This naturally yields a tree style pattern, where the nodes of the tree are
4799 //     the array and indices respectively.
4800 //
4801 //     Example:
4802 //         a => {
4803 //             i => {
4804 //                 j => {
4805 //                     k => {}
4806 //                 }
4807 //             },
4808 //             x => {
4809 //                 y => {
4810 //                     k => {}
4811 //                 }
4812 //             }
4813 //         }
4814 //
4815 //         Notice that the variables in the same levels can have their conditions combined in the
4816 //         same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
4817 //         combined with a short-circuiting AND (i.e., different basic blocks).
4818 //
4819 //  Operation:
4820 //      Construct a tree of array indices and the array which will generate the optimal
4821 //      conditions for loop cloning.
4822 //
4823 //      a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
4824 //
4825 //      a => {
4826 //          i => {
4827 //              j => {
4828 //                  k => {}
4829 //              },
4830 //              y => {
4831 //                  k => {}
4832 //              },
4833 //          }
4834 //      },
4835 //      b => {
4836 //          i => {}
4837 //      }
4838 //      In this method, we will construct such a tree by descending depth first into the array
4839 //      index operation and forming a tree structure as we encounter the array or the index variables.
4840 //
4841 //      This tree structure will then be used to generate conditions like below:
4842 //      (a != null) & (b != null) &&       // from the first level of the tree.
4843 //
4844 //      (i < a.len) & (i < b.len) &&       // from the second level of the tree. Levels can be combined.
4845 //      (a[i] != null) & (b[i] != null) && // from the second level of the tree.
4846 //
4847 //      (j < a[i].len) & (y < a[i].len) &&       // from the third level.
4848 //      (a[i][j] != null) & (a[i][y] != null) && // from the third level.
4849 //
4850 //      and so on.
4851 //
4852 //
4853 bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
4854 {
4855     JitExpandArrayStack<LC_Deref*> nodes(getAllocator());
4856     int                            maxRank = -1;
4857
4858     // Get the dereference-able arrays.
4859     JitExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
4860
4861     // For each array in the dereference list, construct a tree,
4862     // where the nodes are array and index variables and an edge 'u-v'
4863     // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
4864     // 'u-v-w' transitively if u[v][w] occurs.
4865     for (unsigned i = 0; i < deref->Size(); ++i)
4866     {
4867         LC_Array& array = (*deref)[i];
4868
4869         // First populate the array base variable.
4870         LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
4871         if (node == nullptr)
4872         {
4873             node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
4874             nodes.Push(node);
4875         }
4876
4877         // For each dimension (level) for the array, populate the tree with the variable
4878         // from that dimension.
4879         unsigned rank = (unsigned)array.GetDimRank();
4880         for (unsigned i = 0; i < rank; ++i)
4881         {
4882             node->EnsureChildren(getAllocator());
4883             LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
4884             if (tmp == nullptr)
4885             {
4886                 tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
4887                 node->children->Push(tmp);
4888             }
4889
4890             // Descend one level down.
4891             node = tmp;
4892         }
4893
4894         // Keep the maxRank of all array dereferences.
4895         maxRank = max((int)rank, maxRank);
4896     }
4897
4898 #ifdef DEBUG
4899     if (verbose)
4900     {
4901         for (unsigned i = 0; i < nodes.Size(); ++i)
4902         {
4903             if (i != 0)
4904             {
4905                 printf(",");
4906             }
4907             nodes[i]->Print();
4908             printf("\n");
4909         }
4910     }
4911 #endif
4912
4913     if (maxRank == -1)
4914     {
4915         return false;
4916     }
4917
4918     // First level will always yield the null-check, since it is made of the array base variables.
4919     // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
4920     // So add 1 after rank * 2.
4921     unsigned condBlocks = (unsigned)maxRank * 2 + 1;
4922
4923     // Heuristic to not create too many blocks;
4924     if (condBlocks > 4)
4925     {
4926         return false;
4927     }
4928
4929     // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
4930     JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* levelCond =
4931         context->EnsureBlockConditions(loopNum, condBlocks);
4932     for (unsigned i = 0; i < nodes.Size(); ++i)
4933     {
4934         nodes[i]->DeriveLevelConditions(levelCond);
4935     }
4936
4937     DBEXEC(verbose, context->PrintBlockConditions(loopNum));
4938     return true;
4939 }
4940
4941 #ifdef DEBUG
4942 //----------------------------------------------------------------------------
4943 // optDebugLogLoopCloning:  Insert a call to jithelper that prints a message.
4944 //
4945 // Arguments:
4946 //      block        - the block in which the helper call needs to be inserted.
4947 //      insertBefore - the tree before which the helper call will be inserted.
4948 //
4949 void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTree* insertBefore)
4950 {
4951     if (JitConfig.JitDebugLogLoopCloning() == 0)
4952     {
4953         return;
4954     }
4955     GenTree* logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
4956     GenTree* stmt    = fgNewStmtFromTree(logCall);
4957     fgInsertStmtBefore(block, insertBefore, stmt);
4958     fgMorphBlockStmt(block, stmt->AsStmt() DEBUGARG("Debug log loop cloning"));
4959 }
4960 #endif
4961
4962 //------------------------------------------------------------------------
4963 // optPerformStaticOptimizations: Perform the optimizations for the optimization
4964 //      candidates gathered during the cloning phase.
4965 //
4966 // Arguments:
4967 //     loopNum     -  the current loop index for which the optimizations are performed.
4968 //     context     -  data structure where all loop cloning info is kept.
4969 //     dynamicPath -  If true, the optimization is performed in the fast path among the
4970 //                    cloned loops. If false, it means this is the only path (i.e.,
4971 //                    there is no slow path.)
4972 //
4973 // Operation:
4974 //      Perform the optimizations on the fast path i.e., the path in which the
4975 //      optimization candidates were collected at the time of identifying them.
4976 //      The candidates store all the information necessary (the tree/stmt/block
4977 //      they are from) to perform the optimization.
4978 //
4979 // Assumption:
4980 //      The unoptimized path is either already cloned when this method is called or
4981 //      there is no unoptimized path (got eliminated statically.) So this method
4982 //      performs the optimizations assuming that the path in which the candidates
4983 //      were collected is the fast path in which the optimizations will be performed.
4984 //
4985 void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
4986 {
4987     JitExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
4988     for (unsigned i = 0; i < optInfos->Size(); ++i)
4989     {
4990         LcOptInfo* optInfo = optInfos->GetRef(i);
4991         switch (optInfo->GetOptType())
4992         {
4993             case LcOptInfo::LcJaggedArray:
4994             {
4995                 LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
4996                 compCurBB                          = arrIndexInfo->arrIndex.useBlock;
4997                 optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt);
4998                 DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
4999             }
5000             break;
5001             case LcOptInfo::LcMdArray:
5002                 // TODO-CQ: CLONE: Implement.
5003                 break;
5004             default:
5005                 break;
5006         }
5007     }
5008 }
5009
5010 //----------------------------------------------------------------------------
5011 //  optCanCloneLoops: Use the environment flag to determine whether loop
5012 //      cloning is allowed to be performed.
5013 //
5014 //  Return Value:
5015 //      Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
5016 //      Disabled for retail for now.
5017 //
5018 bool Compiler::optCanCloneLoops()
5019 {
5020     // Enabled for retail builds now.
5021     unsigned cloneLoopsFlag = 1;
5022 #ifdef DEBUG
5023     cloneLoopsFlag = JitConfig.JitCloneLoops();
5024 #endif
5025     return (cloneLoopsFlag != 0);
5026 }
5027
5028 //----------------------------------------------------------------------------
5029 //  optIsLoopClonable: Determine whether this loop can be cloned.
5030 //
5031 //  Arguments:
5032 //      loopInd     loop index which needs to be checked if it can be cloned.
5033 //
5034 //  Return Value:
5035 //      Returns true if the loop can be cloned. If it returns false
5036 //      prints a message in debug as why the loop can't be cloned.
5037 //
5038 bool Compiler::optIsLoopClonable(unsigned loopInd)
5039 {
5040     // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
5041     // inserting new EH regions in the exception table yet.
5042     BasicBlock* stopAt       = optLoopTable[loopInd].lpBottom->bbNext;
5043     unsigned    loopRetCount = 0;
5044     for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
5045     {
5046         if (blk->bbJumpKind == BBJ_RETURN)
5047         {
5048             loopRetCount++;
5049         }
5050         if (bbIsTryBeg(blk))
5051         {
5052             JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
5053             return false;
5054         }
5055     }
5056
5057     // Is the entry block a handler or filter start?  If so, then if we cloned, we could create a jump
5058     // into the middle of a handler (to go to the cloned copy.)  Reject.
5059     if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
5060     {
5061         JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
5062         return false;
5063     }
5064
5065     // If the head and entry are in different EH regions, reject.
5066     if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
5067     {
5068         JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
5069         return false;
5070     }
5071
5072     // Is the first block after the last block of the loop a handler or filter start?
5073     // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
5074     // and go to where the original loop did.  That raises problems when we don't actually go to
5075     // that block; this is one of those cases.  This could be fixed fairly easily; for example,
5076     // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
5077     // loop.  This is just a corner to cut to get this working faster.
5078     BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
5079     if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
5080     {
5081         JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
5082         return false;
5083     }
5084
5085     // We've previously made a decision whether to have separate return epilogs, or branch to one.
5086     // There's a GCInfo limitation in the x86 case, so that there can be no more than SET_EPILOGCNT_MAX separate
5087     // epilogs.  Other architectures have a limit of 4 here for "historical reasons", but this should be revisited
5088     // (or return blocks should not be considered part of the loop, rendering this issue moot).
5089     unsigned epilogLimit = 4;
5090 #ifdef JIT32_GCENCODER
5091     epilogLimit = SET_EPILOGCNT_MAX;
5092 #endif // JIT32_GCENCODER
5093     if (fgReturnCount + loopRetCount > epilogLimit)
5094     {
5095         JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
5096                 "would exceed the limit of %d.\n",
5097                 loopRetCount, fgReturnCount, epilogLimit);
5098         return false;
5099     }
5100
5101     // Otherwise, we're going to add those return blocks.
5102     fgReturnCount += loopRetCount;
5103
5104     return true;
5105 }
5106
5107 /*****************************************************************************
5108  *
5109  *  Identify loop cloning opportunities, derive loop cloning conditions,
5110  *  perform loop cloning, use the derived conditions to choose which
5111  *  path to take.
5112  */
5113 void Compiler::optCloneLoops()
5114 {
5115     JITDUMP("\n*************** In optCloneLoops()\n");
5116     if (optLoopCount == 0 || !optCanCloneLoops())
5117     {
5118         return;
5119     }
5120
5121 #ifdef DEBUG
5122     if (verbose)
5123     {
5124         printf("Blocks/Trees at start of phase\n");
5125         fgDispBasicBlocks(true);
5126     }
5127 #endif
5128
5129     LoopCloneContext context(optLoopCount, getAllocator());
5130
5131     // Obtain array optimization candidates in the context.
5132     optObtainLoopCloningOpts(&context);
5133
5134     // For each loop, derive cloning conditions for the optimization candidates.
5135     for (unsigned i = 0; i < optLoopCount; ++i)
5136     {
5137         JitExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
5138         if (optInfos == nullptr)
5139         {
5140             continue;
5141         }
5142
5143         if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
5144         {
5145             JITDUMP("> Conditions could not be obtained\n");
5146             context.CancelLoopOptInfo(i);
5147         }
5148         else
5149         {
5150             bool allTrue  = false;
5151             bool anyFalse = false;
5152             context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
5153             if (anyFalse)
5154             {
5155                 context.CancelLoopOptInfo(i);
5156             }
5157             if (allTrue)
5158             {
5159                 // Perform static optimizations on the fast path since we always
5160                 // have to take the cloned path.
5161                 optPerformStaticOptimizations(i, &context DEBUGARG(false));
5162
5163                 // No need to clone.
5164                 context.CancelLoopOptInfo(i);
5165             }
5166         }
5167     }
5168
5169 #if 0
5170     // The code in this #if has been useful in debugging loop cloning issues, by
5171     // enabling selective enablement of the loop cloning optimization according to
5172     // method hash.
5173 #ifdef DEBUG
5174     unsigned methHash = info.compMethodHash();
5175     char* lostr = getenv("loopclonehashlo");
5176     unsigned methHashLo = 0;
5177     if (lostr != NULL)
5178     {
5179         sscanf_s(lostr, "%x", &methHashLo);
5180         // methHashLo = (unsigned(atoi(lostr)) << 2);  // So we don't have to use negative numbers.
5181     }
5182     char* histr = getenv("loopclonehashhi");
5183     unsigned methHashHi = UINT32_MAX;
5184     if (histr != NULL)
5185     {
5186         sscanf_s(histr, "%x", &methHashHi);
5187         // methHashHi = (unsigned(atoi(histr)) << 2);  // So we don't have to use negative numbers.
5188     }
5189     if (methHash < methHashLo || methHash > methHashHi)
5190         return;
5191 #endif
5192 #endif
5193
5194     for (unsigned i = 0; i < optLoopCount; ++i)
5195     {
5196         if (context.GetLoopOptInfo(i) != nullptr)
5197         {
5198             optLoopsCloned++;
5199             context.OptimizeConditions(i DEBUGARG(verbose));
5200             context.OptimizeBlockConditions(i DEBUGARG(verbose));
5201             optCloneLoop(i, &context);
5202         }
5203     }
5204
5205 #ifdef DEBUG
5206     if (verbose)
5207     {
5208         printf("\nAfter loop cloning:\n");
5209         fgDispBasicBlocks(/*dumpTrees*/ true);
5210     }
5211 #endif
5212 }
5213
5214 void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
5215 {
5216     assert(loopInd < optLoopCount);
5217
5218     JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
5219             optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
5220             optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
5221
5222     // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
5223     unsigned depth         = optLoopDepth(loopInd);
5224     unsigned ambientWeight = 1;
5225     for (unsigned j = 0; j < depth; j++)
5226     {
5227         unsigned lastWeight = ambientWeight;
5228         ambientWeight *= BB_LOOP_WEIGHT;
5229         // If the multiplication overflowed, stick at max.
5230         // (Strictly speaking, a multiplication could overflow and still have a result
5231         // that is >= lastWeight...but if so, the original weight must be pretty large,
5232         // and it got bigger, so that's OK.)
5233         if (ambientWeight < lastWeight)
5234         {
5235             ambientWeight = BB_MAX_WEIGHT;
5236             break;
5237         }
5238     }
5239
5240     // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
5241     // Be safe by taking the max with the head block's weight.
5242     ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
5243
5244     // This is the containing loop, if any -- to label any blocks we create that are outside
5245     // the loop being cloned.
5246     unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
5247
5248     // First, make sure that the loop has a unique header block, creating an empty one if necessary.
5249     optEnsureUniqueHead(loopInd, ambientWeight);
5250
5251     // We're going to make
5252
5253     // H --> E
5254     // F
5255     // T
5256     // E
5257     // B  ?-> T
5258     // X
5259     //
5260     //   become
5261     //
5262     // H ?-> E2
5263     // H2--> E    (Optional; if E == T == F, let H fall through to F/T/E)
5264     // F
5265     // T
5266     // E
5267     // B  ?-> T
5268     // X2--> X
5269     // F2
5270     // T2
5271     // E2
5272     // B2 ?-> T2
5273     // X
5274
5275     BasicBlock* h = optLoopTable[loopInd].lpHead;
5276     if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
5277     {
5278         // Make a new block to be the unique entry to the loop.
5279         assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
5280         BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
5281                                         /*extendRegion*/ true);
5282         newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
5283         BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
5284         // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5285         newH->bbNatLoopNum = ambientLoop;
5286         h                  = newH;
5287         optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
5288     }
5289
5290     // First, make X2 after B, if necessary.  (Not necessary if b is a BBJ_ALWAYS.)
5291     // "newPred" will be the predecessor of the blocks of the cloned loop.
5292     BasicBlock* b       = optLoopTable[loopInd].lpBottom;
5293     BasicBlock* newPred = b;
5294     if (b->bbJumpKind != BBJ_ALWAYS)
5295     {
5296         BasicBlock* x = b->bbNext;
5297         if (x != nullptr)
5298         {
5299             BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
5300             x2->bbWeight   = (x2->isRunRarely() ? 0 : ambientWeight);
5301
5302             // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5303             x2->bbNatLoopNum = ambientLoop;
5304
5305             x2->bbJumpDest = x;
5306             BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
5307             newPred = x2;
5308         }
5309     }
5310
5311     // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
5312     // so that "h" already falls through to "e" (e == t == f).
5313     BasicBlock* h2 = nullptr;
5314     if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
5315     {
5316         BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
5317                                       /*extendRegion*/ true);
5318         h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
5319
5320         // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
5321         h2->bbNatLoopNum = ambientLoop;
5322
5323         h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
5324         optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
5325     }
5326
5327     // Now we'll clone the blocks of the loop body.
5328     BasicBlock* newFirst = nullptr;
5329     BasicBlock* newBot   = nullptr;
5330
5331     BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
5332     for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
5333          blk             = blk->bbNext)
5334     {
5335         BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
5336                                           /*extendRegion*/ true);
5337
5338         // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
5339         // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
5340         // checked them to guarantee they are clonable.
5341         bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
5342         noway_assert(cloneOk);
5343         // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect.  Eventually, we should probably insert
5344         // the cloned loop in the loop table.  For now, however, we'll just make these blocks be part of the surrounding
5345         // loop, if one exists -- the parent of the loop we're cloning.
5346         newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
5347
5348         if (newFirst == nullptr)
5349         {
5350             newFirst = newBlk;
5351         }
5352         newBot  = newBlk; // Continually overwrite to make sure we get the last one.
5353         newPred = newBlk;
5354         blockMap->Set(blk, newBlk);
5355     }
5356
5357     // Perform the static optimizations on the fast path.
5358     optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
5359
5360     // Now go through the new blocks, remapping their jump targets within the loop.
5361     for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
5362          blk             = blk->bbNext)
5363     {
5364
5365         BasicBlock* newblk = nullptr;
5366         bool        b      = blockMap->Lookup(blk, &newblk);
5367         assert(b && newblk != nullptr);
5368
5369         assert(blk->bbJumpKind == newblk->bbJumpKind);
5370
5371         // First copy the jump destination(s) from "blk".
5372         optCopyBlkDest(blk, newblk);
5373
5374         // Now redirect the new block according to "blockMap".
5375         optRedirectBlock(newblk, blockMap);
5376     }
5377
5378     assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
5379            (h->bbJumpKind == BBJ_ALWAYS));
5380
5381     // If all the conditions are true, go to E2.
5382     BasicBlock* e2      = nullptr;
5383     bool        foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
5384
5385     h->bbJumpKind = BBJ_COND;
5386
5387     // We will create the following structure
5388     //
5389     // cond0 (in h)  -?> cond1
5390     // slow          --> e2 (slow) always
5391     // !cond1        -?> slow
5392     // !cond2        -?> slow
5393     // ...
5394     // !condn        -?> slow
5395     // h2/entry (fast)
5396     //
5397     // We should always have block conditions, at the minimum, the array should be deref-able
5398     assert(context->HasBlockConditions(loopInd));
5399
5400     // Create a unique header for the slow path.
5401     BasicBlock* slowHead   = fgNewBBafter(BBJ_ALWAYS, h, true);
5402     slowHead->bbWeight     = (h->isRunRarely() ? 0 : ambientWeight);
5403     slowHead->bbNatLoopNum = ambientLoop;
5404     slowHead->bbJumpDest   = e2;
5405
5406     BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
5407     condLast->bbJumpDest = slowHead;
5408
5409     // If h2 is present it is already the head or replace 'h' by 'condLast'.
5410     if (h2 == nullptr)
5411     {
5412         optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
5413     }
5414     assert(foundIt && e2 != nullptr);
5415
5416     // Don't unroll loops that we've cloned -- the unroller expects any loop it should unroll to
5417     // initialize the loop counter immediately before entering the loop, but we've left a shared
5418     // initialization of the loop counter up above the test that determines which version of the
5419     // loop to take.
5420     optLoopTable[loopInd].lpFlags |= LPFLG_DONT_UNROLL;
5421
5422     fgUpdateChangedFlowGraph();
5423 }
5424
5425 //--------------------------------------------------------------------------------------------------
5426 // optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
5427 //
5428 // Arguments:
5429 //      context     loop cloning context variable
5430 //      loopNum     the loop index
5431 //      head        loop head for "loopNum"
5432 //      slowHead    the slow path loop head
5433 //
5434 // Return Values:
5435 //      None.
5436 //
5437 // Operation:
5438 //      Create the following structure.
5439 //
5440 //      Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
5441 //      condn cannot jtrue to loop head h2. It has to be from a direct pred block.
5442 //
5443 //      cond0 (in h)  -?> cond1
5444 //      slowHead      --> e2 (slowHead) always
5445 //      !cond1        -?> slowHead
5446 //      !cond2        -?> slowHead
5447 //      ...
5448 //      !condn        -?> slowHead
5449 //      h2/entry (fast)
5450 //
5451 //      Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
5452 //
5453 BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
5454                                                     unsigned          loopNum,
5455                                                     BasicBlock*       head,
5456                                                     BasicBlock*       slowHead)
5457 {
5458     JITDUMP("Inserting loop cloning conditions\n");
5459     assert(context->HasBlockConditions(loopNum));
5460
5461     BasicBlock*                                              curCond   = head;
5462     JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
5463     for (unsigned i = 0; i < levelCond->Size(); ++i)
5464     {
5465         bool isHeaderBlock = (curCond == head);
5466
5467         // Flip the condition if header block.
5468         context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
5469
5470         // Create each condition block ensuring wiring between them.
5471         BasicBlock* tmp     = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
5472         curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
5473         curCond             = tmp;
5474
5475         curCond->inheritWeight(head);
5476         curCond->bbNatLoopNum = head->bbNatLoopNum;
5477         JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
5478     }
5479
5480     // Finally insert cloning conditions after all deref conditions have been inserted.
5481     context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
5482     return curCond;
5483 }
5484
5485 void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
5486 {
5487     BasicBlock* h = optLoopTable[loopInd].lpHead;
5488     BasicBlock* t = optLoopTable[loopInd].lpTop;
5489     BasicBlock* e = optLoopTable[loopInd].lpEntry;
5490     BasicBlock* b = optLoopTable[loopInd].lpBottom;
5491
5492     // If "h" dominates the entry block, then it is the unique header.
5493     if (fgDominate(h, e))
5494     {
5495         return;
5496     }
5497
5498     // Otherwise, create a new empty header block, make it the pred of the entry block,
5499     // and redirect the preds of the entry block to go to this.
5500
5501     BasicBlock* beforeTop = t->bbPrev;
5502     // Make sure that the new block is in the same region as the loop.
5503     // (We will only create loops that are entirely within a region.)
5504     BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
5505     // This is in the containing loop.
5506     h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
5507     h2->bbWeight     = (h2->isRunRarely() ? 0 : ambientWeight);
5508
5509     // We don't care where it was put; splice it between beforeTop and top.
5510     if (beforeTop->bbNext != h2)
5511     {
5512         h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
5513         beforeTop->setNext(h2);          // Splice h2 in, between beforeTop and t.
5514         h2->setNext(t);
5515     }
5516
5517     if (h2->bbNext != e)
5518     {
5519         h2->bbJumpKind = BBJ_ALWAYS;
5520         h2->bbJumpDest = e;
5521     }
5522     BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
5523
5524     // Redirect paths from preds of "e" to go to "h2" instead of "e".
5525     BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
5526     blockMap->Set(e, h2);
5527
5528     for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
5529     {
5530         BasicBlock* predBlock = predEntry->flBlock;
5531
5532         // Skip if predBlock is in the loop.
5533         if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
5534         {
5535             continue;
5536         }
5537         optRedirectBlock(predBlock, blockMap);
5538     }
5539
5540     optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
5541 }
5542
5543 /*****************************************************************************
5544  *
5545  *  Determine the kind of interference for the call.
5546  */
5547
5548 /* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreeCall* call)
5549 {
5550     // if not a helper, kills everything
5551     if (call->gtCallType != CT_HELPER)
5552     {
5553         return CALLINT_ALL;
5554     }
5555
5556     // setfield and array address store kill all indirections
5557     switch (eeGetHelperNum(call->gtCallMethHnd))
5558     {
5559         case CORINFO_HELP_ASSIGN_REF:         // Not strictly needed as we don't make a GT_CALL with this
5560         case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
5561         case CORINFO_HELP_ASSIGN_BYREF:       // Not strictly needed as we don't make a GT_CALL with this
5562         case CORINFO_HELP_SETFIELDOBJ:
5563         case CORINFO_HELP_ARRADDR_ST:
5564
5565             return CALLINT_REF_INDIRS;
5566
5567         case CORINFO_HELP_SETFIELDFLOAT:
5568         case CORINFO_HELP_SETFIELDDOUBLE:
5569         case CORINFO_HELP_SETFIELD8:
5570         case CORINFO_HELP_SETFIELD16:
5571         case CORINFO_HELP_SETFIELD32:
5572         case CORINFO_HELP_SETFIELD64:
5573
5574             return CALLINT_SCL_INDIRS;
5575
5576         case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this
5577         case CORINFO_HELP_MEMSET:        // Not strictly needed as we don't make a GT_CALL with this
5578         case CORINFO_HELP_MEMCPY:        // Not strictly needed as we don't make a GT_CALL with this
5579         case CORINFO_HELP_SETFIELDSTRUCT:
5580
5581             return CALLINT_ALL_INDIRS;
5582
5583         default:
5584             break;
5585     }
5586
5587     // other helpers kill nothing
5588     return CALLINT_NONE;
5589 }
5590
5591 /*****************************************************************************
5592  *
5593  *  See if the given tree can be computed in the given precision (which must
5594  *  be smaller than the type of the tree for this to make sense). If 'doit'
5595  *  is false, we merely check to see whether narrowing is possible; if we
5596  *  get called with 'doit' being true, we actually perform the narrowing.
5597  */
5598
5599 bool Compiler::optNarrowTree(GenTree* tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
5600 {
5601     genTreeOps oper;
5602     unsigned   kind;
5603
5604     noway_assert(tree);
5605     noway_assert(genActualType(tree->gtType) == genActualType(srct));
5606
5607     /* Assume we're only handling integer types */
5608     noway_assert(varTypeIsIntegral(srct));
5609     noway_assert(varTypeIsIntegral(dstt));
5610
5611     unsigned srcSize = genTypeSize(srct);
5612     unsigned dstSize = genTypeSize(dstt);
5613
5614     /* dstt must be smaller than srct to narrow */
5615     if (dstSize >= srcSize)
5616     {
5617         return false;
5618     }
5619
5620     /* Figure out what kind of a node we have */
5621     oper = tree->OperGet();
5622     kind = tree->OperKind();
5623
5624     if (GenTree::OperIsAssignment(oper))
5625     {
5626         noway_assert(doit == false);
5627         return false;
5628     }
5629
5630     ValueNumPair NoVNPair = ValueNumPair();
5631
5632     if (kind & GTK_LEAF)
5633     {
5634         switch (oper)
5635         {
5636             /* Constants can usually be narrowed by changing their value */
5637             CLANG_FORMAT_COMMENT_ANCHOR;
5638
5639 #ifndef _TARGET_64BIT_
5640             __int64 lval;
5641             __int64 lmask;
5642
5643             case GT_CNS_LNG:
5644                 lval  = tree->gtIntConCommon.LngValue();
5645                 lmask = 0;
5646
5647                 switch (dstt)
5648                 {
5649                     case TYP_BYTE:
5650                         lmask = 0x0000007F;
5651                         break;
5652                     case TYP_BOOL:
5653                     case TYP_UBYTE:
5654                         lmask = 0x000000FF;
5655                         break;
5656                     case TYP_SHORT:
5657                         lmask = 0x00007FFF;
5658                         break;
5659                     case TYP_USHORT:
5660                         lmask = 0x0000FFFF;
5661                         break;
5662                     case TYP_INT:
5663                         lmask = 0x7FFFFFFF;
5664                         break;
5665                     case TYP_UINT:
5666                         lmask = 0xFFFFFFFF;
5667                         break;
5668
5669                     default:
5670                         return false;
5671                 }
5672
5673                 if ((lval & lmask) != lval)
5674                     return false;
5675
5676                 if (doit)
5677                 {
5678                     tree->ChangeOperConst(GT_CNS_INT);
5679                     tree->gtType             = TYP_INT;
5680                     tree->gtIntCon.gtIconVal = (int)lval;
5681                     if (vnStore != nullptr)
5682                     {
5683                         fgValueNumberTreeConst(tree);
5684                     }
5685                 }
5686
5687                 return true;
5688 #endif
5689
5690             case GT_CNS_INT:
5691
5692                 ssize_t ival;
5693                 ival = tree->gtIntCon.gtIconVal;
5694                 ssize_t imask;
5695                 imask = 0;
5696
5697                 switch (dstt)
5698                 {
5699                     case TYP_BYTE:
5700                         imask = 0x0000007F;
5701                         break;
5702                     case TYP_BOOL:
5703                     case TYP_UBYTE:
5704                         imask = 0x000000FF;
5705                         break;
5706                     case TYP_SHORT:
5707                         imask = 0x00007FFF;
5708                         break;
5709                     case TYP_USHORT:
5710                         imask = 0x0000FFFF;
5711                         break;
5712 #ifdef _TARGET_64BIT_
5713                     case TYP_INT:
5714                         imask = 0x7FFFFFFF;
5715                         break;
5716                     case TYP_UINT:
5717                         imask = 0xFFFFFFFF;
5718                         break;
5719 #endif // _TARGET_64BIT_
5720                     default:
5721                         return false;
5722                 }
5723
5724                 if ((ival & imask) != ival)
5725                 {
5726                     return false;
5727                 }
5728
5729 #ifdef _TARGET_64BIT_
5730                 if (doit)
5731                 {
5732                     tree->gtType             = TYP_INT;
5733                     tree->gtIntCon.gtIconVal = (int)ival;
5734                     if (vnStore != nullptr)
5735                     {
5736                         fgValueNumberTreeConst(tree);
5737                     }
5738                 }
5739 #endif // _TARGET_64BIT_
5740
5741                 return true;
5742
5743             /* Operands that are in memory can usually be narrowed
5744                simply by changing their gtType */
5745
5746             case GT_LCL_VAR:
5747                 /* We only allow narrowing long -> int for a GT_LCL_VAR */
5748                 if (dstSize == sizeof(int))
5749                 {
5750                     goto NARROW_IND;
5751                 }
5752                 break;
5753
5754             case GT_CLS_VAR:
5755             case GT_LCL_FLD:
5756                 goto NARROW_IND;
5757             default:
5758                 break;
5759         }
5760
5761         noway_assert(doit == false);
5762         return false;
5763     }
5764
5765     if (kind & (GTK_BINOP | GTK_UNOP))
5766     {
5767         GenTree* op1;
5768         op1 = tree->gtOp.gtOp1;
5769         GenTree* op2;
5770         op2 = tree->gtOp.gtOp2;
5771
5772         switch (tree->gtOper)
5773         {
5774             case GT_AND:
5775                 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5776
5777                 // Is op2 a small constant than can be narrowed into dstt?
5778                 // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
5779                 if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
5780                 {
5781                     // We will change the type of the tree and narrow op2
5782                     //
5783                     if (doit)
5784                     {
5785                         tree->gtType = genActualType(dstt);
5786                         tree->SetVNs(vnpNarrow);
5787
5788                         optNarrowTree(op2, srct, dstt, NoVNPair, true);
5789                         // We may also need to cast away the upper bits of op1
5790                         if (srcSize == 8)
5791                         {
5792                             assert(tree->gtType == TYP_INT);
5793                             op1 = gtNewCastNode(TYP_INT, op1, false, TYP_INT);
5794 #ifdef DEBUG
5795                             op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
5796 #endif
5797                             tree->gtOp.gtOp1 = op1;
5798                         }
5799                     }
5800                     return true;
5801                 }
5802
5803                 goto COMMON_BINOP;
5804
5805             case GT_ADD:
5806             case GT_MUL:
5807
5808                 if (tree->gtOverflow() || varTypeIsSmall(dstt))
5809                 {
5810                     noway_assert(doit == false);
5811                     return false;
5812                 }
5813                 __fallthrough;
5814
5815             case GT_OR:
5816             case GT_XOR:
5817             COMMON_BINOP:
5818                 noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
5819                 noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
5820
5821                 if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
5822                     !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
5823                 {
5824                     noway_assert(doit == false);
5825                     return false;
5826                 }
5827
5828                 /* Simply change the type of the tree */
5829
5830                 if (doit)
5831                 {
5832                     if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
5833                     {
5834                         tree->gtFlags &= ~GTF_MUL_64RSLT;
5835                     }
5836
5837                     tree->gtType = genActualType(dstt);
5838                     tree->SetVNs(vnpNarrow);
5839                 }
5840
5841                 return true;
5842
5843             case GT_IND:
5844
5845             NARROW_IND:
5846                 /* Simply change the type of the tree */
5847
5848                 if (doit && (dstSize <= genTypeSize(tree->gtType)))
5849                 {
5850                     tree->gtType = genSignedType(dstt);
5851                     tree->SetVNs(vnpNarrow);
5852
5853                     /* Make sure we don't mess up the variable type */
5854                     if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
5855                     {
5856                         tree->gtFlags |= GTF_VAR_CAST;
5857                     }
5858                 }
5859
5860                 return true;
5861
5862             case GT_EQ:
5863             case GT_NE:
5864             case GT_LT:
5865             case GT_LE:
5866             case GT_GT:
5867             case GT_GE:
5868
5869                 /* These can always be narrowed since they only represent 0 or 1 */
5870                 return true;
5871
5872             case GT_CAST:
5873             {
5874                 var_types cast    = tree->CastToType();
5875                 var_types oprt    = op1->TypeGet();
5876                 unsigned  oprSize = genTypeSize(oprt);
5877
5878                 if (cast != srct)
5879                 {
5880                     return false;
5881                 }
5882
5883                 if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
5884                 {
5885                     return false;
5886                 }
5887
5888                 if (tree->gtOverflow())
5889                 {
5890                     return false;
5891                 }
5892
5893                 /* Is this a cast from the type we're narrowing to or a smaller one? */
5894
5895                 if (oprSize <= dstSize)
5896                 {
5897                     /* Bash the target type of the cast */
5898
5899                     if (doit)
5900                     {
5901                         dstt = genSignedType(dstt);
5902
5903                         if (oprSize == dstSize)
5904                         {
5905                             // Same size: change the CAST into a NOP
5906                             tree->ChangeOper(GT_NOP);
5907                             tree->gtType     = dstt;
5908                             tree->gtOp.gtOp2 = nullptr;
5909                             tree->gtVNPair   = op1->gtVNPair; // Set to op1's ValueNumber
5910                         }
5911                         else
5912                         {
5913                             // oprSize is smaller
5914                             assert(oprSize < dstSize);
5915
5916                             // Change the CastToType in the GT_CAST node
5917                             tree->CastToType() = dstt;
5918
5919                             // The result type of a GT_CAST is never a small type.
5920                             // Use genActualType to widen dstt when it is a small types.
5921                             tree->gtType = genActualType(dstt);
5922                             tree->SetVNs(vnpNarrow);
5923                         }
5924                     }
5925
5926                     return true;
5927                 }
5928             }
5929                 return false;
5930
5931             case GT_COMMA:
5932                 if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
5933                 {
5934                     /* Simply change the type of the tree */
5935
5936                     if (doit)
5937                     {
5938                         tree->gtType = genActualType(dstt);
5939                         tree->SetVNs(vnpNarrow);
5940                     }
5941                     return true;
5942                 }
5943                 return false;
5944
5945             default:
5946                 noway_assert(doit == false);
5947                 return false;
5948         }
5949     }
5950
5951     return false;
5952 }
5953
5954 /*****************************************************************************
5955  *
5956  *  The following logic figures out whether the given variable is assigned
5957  *  somewhere in a list of basic blocks (or in an entire loop).
5958  */
5959
5960 Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTree** pTree, fgWalkData* data)
5961 {
5962     GenTree* tree = *pTree;
5963
5964     if (tree->OperIsAssignment())
5965     {
5966         GenTree*   dest     = tree->gtOp.gtOp1;
5967         genTreeOps destOper = dest->OperGet();
5968
5969         isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
5970         assert(desc && desc->ivaSelf == desc);
5971
5972         if (destOper == GT_LCL_VAR)
5973         {
5974             unsigned tvar = dest->gtLclVarCommon.gtLclNum;
5975             if (tvar < lclMAX_ALLSET_TRACKED)
5976             {
5977                 AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
5978             }
5979             else
5980             {
5981                 desc->ivaMaskIncomplete = true;
5982             }
5983
5984             if (tvar == desc->ivaVar)
5985             {
5986                 if (tree != desc->ivaSkip)
5987                 {
5988                     return WALK_ABORT;
5989                 }
5990             }
5991         }
5992         else if (destOper == GT_LCL_FLD)
5993         {
5994             /* We can't track every field of every var. Moreover, indirections
5995                may access different parts of the var as different (but
5996                overlapping) fields. So just treat them as indirect accesses */
5997
5998             // unsigned    lclNum = dest->gtLclFld.gtLclNum;
5999             // noway_assert(lvaTable[lclNum].lvAddrTaken);
6000
6001             varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
6002             desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
6003         }
6004         else if (destOper == GT_CLS_VAR)
6005         {
6006             desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
6007         }
6008         else if (destOper == GT_IND)
6009         {
6010             /* Set the proper indirection bits */
6011
6012             varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
6013             desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
6014         }
6015     }
6016     else if (tree->gtOper == GT_CALL)
6017     {
6018         isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
6019         assert(desc && desc->ivaSelf == desc);
6020
6021         desc->ivaMaskCall = optCallInterf(tree->AsCall());
6022     }
6023
6024     return WALK_CONTINUE;
6025 }
6026
6027 /*****************************************************************************/
6028
6029 bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTree* skip, unsigned var)
6030 {
6031     bool         result;
6032     isVarAssgDsc desc;
6033
6034     desc.ivaSkip = skip;
6035 #ifdef DEBUG
6036     desc.ivaSelf = &desc;
6037 #endif
6038     desc.ivaVar      = var;
6039     desc.ivaMaskCall = CALLINT_NONE;
6040     AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
6041
6042     for (;;)
6043     {
6044         noway_assert(beg);
6045
6046         for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
6047         {
6048             noway_assert(stmt->gtOper == GT_STMT);
6049             if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
6050             {
6051                 result = true;
6052                 goto DONE;
6053             }
6054         }
6055
6056         if (beg == end)
6057         {
6058             break;
6059         }
6060
6061         beg = beg->bbNext;
6062     }
6063
6064     result = false;
6065
6066 DONE:
6067
6068     return result;
6069 }
6070
6071 /*****************************************************************************/
6072 int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
6073 {
6074     LoopDsc* loop;
6075
6076     /* Get hold of the loop descriptor */
6077
6078     noway_assert(lnum < optLoopCount);
6079     loop = optLoopTable + lnum;
6080
6081     /* Do we already know what variables are assigned within this loop? */
6082
6083     if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
6084     {
6085         isVarAssgDsc desc;
6086
6087         BasicBlock* beg;
6088         BasicBlock* end;
6089
6090         /* Prepare the descriptor used by the tree walker call-back */
6091
6092         desc.ivaVar  = (unsigned)-1;
6093         desc.ivaSkip = nullptr;
6094 #ifdef DEBUG
6095         desc.ivaSelf = &desc;
6096 #endif
6097         AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
6098         desc.ivaMaskInd        = VR_NONE;
6099         desc.ivaMaskCall       = CALLINT_NONE;
6100         desc.ivaMaskIncomplete = false;
6101
6102         /* Now walk all the statements of the loop */
6103
6104         beg = loop->lpHead->bbNext;
6105         end = loop->lpBottom;
6106
6107         for (/**/; /**/; beg = beg->bbNext)
6108         {
6109             noway_assert(beg);
6110
6111             for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6112             {
6113                 noway_assert(stmt->gtOper == GT_STMT);
6114                 fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
6115
6116                 if (desc.ivaMaskIncomplete)
6117                 {
6118                     loop->lpFlags |= LPFLG_ASGVARS_INC;
6119                 }
6120             }
6121
6122             if (beg == end)
6123             {
6124                 break;
6125             }
6126         }
6127
6128         AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
6129         loop->lpAsgInds = desc.ivaMaskInd;
6130         loop->lpAsgCall = desc.ivaMaskCall;
6131
6132         /* Now we know what variables are assigned in the loop */
6133
6134         loop->lpFlags |= LPFLG_ASGVARS_YES;
6135     }
6136
6137     /* Now we can finally test the caller's mask against the loop's */
6138     if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
6139     {
6140         return 1;
6141     }
6142
6143     switch (loop->lpAsgCall)
6144     {
6145         case CALLINT_ALL:
6146
6147             /* Can't hoist if the call might have side effect on an indirection. */
6148
6149             if (loop->lpAsgInds != VR_NONE)
6150             {
6151                 return 1;
6152             }
6153
6154             break;
6155
6156         case CALLINT_REF_INDIRS:
6157
6158             /* Can't hoist if the call might have side effect on an ref indirection. */
6159
6160             if (loop->lpAsgInds & VR_IND_REF)
6161             {
6162                 return 1;
6163             }
6164
6165             break;
6166
6167         case CALLINT_SCL_INDIRS:
6168
6169             /* Can't hoist if the call might have side effect on an non-ref indirection. */
6170
6171             if (loop->lpAsgInds & VR_IND_SCL)
6172             {
6173                 return 1;
6174             }
6175
6176             break;
6177
6178         case CALLINT_ALL_INDIRS:
6179
6180             /* Can't hoist if the call might have side effect on any indirection. */
6181
6182             if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
6183             {
6184                 return 1;
6185             }
6186
6187             break;
6188
6189         case CALLINT_NONE:
6190
6191             /* Other helpers kill nothing */
6192
6193             break;
6194
6195         default:
6196             noway_assert(!"Unexpected lpAsgCall value");
6197     }
6198
6199     return 0;
6200 }
6201
6202 void Compiler::optPerformHoistExpr(GenTree* origExpr, unsigned lnum)
6203 {
6204 #ifdef DEBUG
6205     if (verbose)
6206     {
6207         printf("\nHoisting a copy of ");
6208         printTreeID(origExpr);
6209         printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
6210                optLoopTable[lnum].lpBottom->bbNum);
6211         gtDispTree(origExpr);
6212         printf("\n");
6213     }
6214 #endif
6215
6216     // This loop has to be in a form that is approved for hoisting.
6217     assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
6218
6219     // Create a copy of the expression and mark it for CSE's.
6220     GenTree* hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
6221
6222     // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
6223     assert(hoistExpr != origExpr);
6224     assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
6225
6226     GenTree* hoist = hoistExpr;
6227     // The value of the expression isn't used (unless it's an assignment).
6228     if (hoistExpr->OperGet() != GT_ASG)
6229     {
6230         hoist = gtUnusedValNode(hoistExpr);
6231     }
6232
6233     /* Put the statement in the preheader */
6234
6235     fgCreateLoopPreHeader(lnum);
6236
6237     BasicBlock* preHead = optLoopTable[lnum].lpHead;
6238     assert(preHead->bbJumpKind == BBJ_NONE);
6239
6240     // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
6241     // (or in this case, will contain) the expression.
6242     compCurBB = preHead;
6243
6244     // Increment the ref counts of any local vars appearing in "hoist".
6245     // Note that we need to do this before fgMorphTree() as fgMorph() could constant
6246     // fold away some of the lcl vars referenced by "hoist".
6247     lvaRecursiveIncRefCounts(hoist);
6248
6249     hoist = fgMorphTree(hoist);
6250
6251     GenTree* hoistStmt = gtNewStmt(hoist);
6252     hoistStmt->gtFlags |= GTF_STMT_CMPADD;
6253
6254     /* simply append the statement at the end of the preHead's list */
6255
6256     GenTree* treeList = preHead->bbTreeList;
6257
6258     if (treeList)
6259     {
6260         /* append after last statement */
6261
6262         GenTree* last = treeList->gtPrev;
6263         assert(last->gtNext == nullptr);
6264
6265         last->gtNext      = hoistStmt;
6266         hoistStmt->gtPrev = last;
6267         treeList->gtPrev  = hoistStmt;
6268     }
6269     else
6270     {
6271         /* Empty pre-header - store the single statement in the block */
6272
6273         preHead->bbTreeList = hoistStmt;
6274         hoistStmt->gtPrev   = hoistStmt;
6275     }
6276
6277     hoistStmt->gtNext = nullptr;
6278
6279 #ifdef DEBUG
6280     if (verbose)
6281     {
6282         printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
6283         gtDispTree(hoist);
6284     }
6285 #endif
6286
6287     if (fgStmtListThreaded)
6288     {
6289         gtSetStmtInfo(hoistStmt);
6290         fgSetStmtSeq(hoistStmt);
6291     }
6292
6293 #ifdef DEBUG
6294     if (m_nodeTestData != nullptr)
6295     {
6296
6297         // What is the depth of the loop "lnum"?
6298         ssize_t  depth    = 0;
6299         unsigned lnumIter = lnum;
6300         while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
6301         {
6302             depth++;
6303             lnumIter = optLoopTable[lnumIter].lpParent;
6304         }
6305
6306         NodeToTestDataMap* testData = GetNodeTestData();
6307
6308         TestLabelAndNum tlAndN;
6309         if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
6310         {
6311             if (tlAndN.m_num == -1)
6312             {
6313                 printf("Node ");
6314                 printTreeID(origExpr);
6315                 printf(" was declared 'do not hoist', but is being hoisted.\n");
6316                 assert(false);
6317             }
6318             else if (tlAndN.m_num != depth)
6319             {
6320                 printf("Node ");
6321                 printTreeID(origExpr);
6322                 printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
6323                        "%d.\n",
6324                        tlAndN.m_num, depth);
6325                 assert(false);
6326             }
6327             else
6328             {
6329                 // We've correctly hoisted this, so remove the annotation.  Later, we'll check for any remaining "must
6330                 // hoist" annotations.
6331                 testData->Remove(origExpr);
6332                 // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
6333                 tlAndN.m_tl  = TL_CSE_Def;
6334                 tlAndN.m_num = m_loopHoistCSEClass++;
6335                 testData->Set(hoistExpr, tlAndN);
6336             }
6337         }
6338     }
6339 #endif
6340
6341 #if LOOP_HOIST_STATS
6342     if (!m_curLoopHasHoistedExpression)
6343     {
6344         m_loopsWithHoistedExpressions++;
6345         m_curLoopHasHoistedExpression = true;
6346     }
6347     m_totalHoistedExpressions++;
6348 #endif // LOOP_HOIST_STATS
6349 }
6350
6351 void Compiler::optHoistLoopCode()
6352 {
6353     // If we don't have any loops in the method then take an early out now.
6354     if (optLoopCount == 0)
6355     {
6356         return;
6357     }
6358
6359 #ifdef DEBUG
6360     unsigned jitNoHoist = JitConfig.JitNoHoist();
6361     if (jitNoHoist > 0)
6362     {
6363         return;
6364     }
6365 #endif
6366
6367 #if 0
6368     // The code in this #if has been useful in debugging loop cloning issues, by
6369     // enabling selective enablement of the loop cloning optimization according to
6370     // method hash.
6371 #ifdef DEBUG
6372     unsigned methHash = info.compMethodHash();
6373     char* lostr = getenv("loophoisthashlo");
6374     unsigned methHashLo = 0;
6375     if (lostr != NULL)
6376     {
6377         sscanf_s(lostr, "%x", &methHashLo);
6378         // methHashLo = (unsigned(atoi(lostr)) << 2);  // So we don't have to use negative numbers.
6379     }
6380     char* histr = getenv("loophoisthashhi");
6381     unsigned methHashHi = UINT32_MAX;
6382     if (histr != NULL)
6383     {
6384         sscanf_s(histr, "%x", &methHashHi);
6385         // methHashHi = (unsigned(atoi(histr)) << 2);  // So we don't have to use negative numbers.
6386     }
6387     if (methHash < methHashLo || methHash > methHashHi)
6388         return;
6389     printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
6390 #endif // DEBUG
6391 #endif // 0     -- debugging loop cloning issues
6392
6393 #ifdef DEBUG
6394     if (verbose)
6395     {
6396         printf("\n*************** In optHoistLoopCode()\n");
6397         printf("Blocks/Trees before phase\n");
6398         fgDispBasicBlocks(true);
6399         printf("");
6400     }
6401 #endif
6402
6403     // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
6404     // they are invariant.)
6405     LoopHoistContext hoistCtxt(this);
6406     for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
6407     {
6408         if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
6409         {
6410             continue;
6411         }
6412
6413         if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
6414         {
6415             optHoistLoopNest(lnum, &hoistCtxt);
6416         }
6417     }
6418
6419 #if DEBUG
6420     if (fgModified)
6421     {
6422         if (verbose)
6423         {
6424             printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
6425             fgDispBasicBlocks(true);
6426             printf("");
6427         }
6428
6429         // Make sure that the predecessor lists are accurate
6430         fgDebugCheckBBlist();
6431     }
6432 #endif
6433
6434 #ifdef DEBUG
6435     // Test Data stuff..
6436     // If we have no test data, early out.
6437     if (m_nodeTestData == nullptr)
6438     {
6439         return;
6440     }
6441     NodeToTestDataMap* testData = GetNodeTestData();
6442     for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
6443     {
6444         TestLabelAndNum tlAndN;
6445         GenTree*        node = ki.Get();
6446         bool            b    = testData->Lookup(node, &tlAndN);
6447         assert(b);
6448         if (tlAndN.m_tl != TL_LoopHoist)
6449         {
6450             continue;
6451         }
6452         // Otherwise, it is a loop hoist annotation.
6453         assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
6454         if (tlAndN.m_num >= 0)
6455         {
6456             printf("Node ");
6457             printTreeID(node);
6458             printf(" was declared 'must hoist', but has not been hoisted.\n");
6459             assert(false);
6460         }
6461     }
6462 #endif // DEBUG
6463 }
6464
6465 void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
6466 {
6467     // Do this loop, then recursively do all nested loops.
6468     CLANG_FORMAT_COMMENT_ANCHOR;
6469
6470 #if LOOP_HOIST_STATS
6471     // Record stats
6472     m_curLoopHasHoistedExpression = false;
6473     m_loopsConsidered++;
6474 #endif // LOOP_HOIST_STATS
6475
6476     optHoistThisLoop(lnum, hoistCtxt);
6477
6478     VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
6479
6480     if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
6481     {
6482         // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
6483         // TODO-Cleanup: we should have a set abstraction for loops.
6484         if (hoistedInCurLoop != nullptr)
6485         {
6486             for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
6487             {
6488 #ifdef DEBUG
6489                 bool b;
6490                 assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
6491 #endif
6492                 hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
6493             }
6494         }
6495
6496         for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
6497              child          = optLoopTable[child].lpSibling)
6498         {
6499             optHoistLoopNest(child, hoistCtxt);
6500         }
6501
6502         // Now remove them.
6503         // TODO-Cleanup: we should have a set abstraction for loops.
6504         if (hoistedInCurLoop != nullptr)
6505         {
6506             for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
6507             {
6508                 // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
6509                 hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
6510             }
6511         }
6512     }
6513 }
6514
6515 void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
6516 {
6517     LoopDsc* pLoopDsc = &optLoopTable[lnum];
6518
6519     /* If loop was removed continue */
6520
6521     if (pLoopDsc->lpFlags & LPFLG_REMOVED)
6522     {
6523         return;
6524     }
6525
6526     /* Get the head and tail of the loop */
6527
6528     BasicBlock* head = pLoopDsc->lpHead;
6529     BasicBlock* tail = pLoopDsc->lpBottom;
6530     BasicBlock* lbeg = pLoopDsc->lpEntry;
6531
6532     // We must have a do-while loop
6533     if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
6534     {
6535         return;
6536     }
6537
6538     // The loop-head must dominate the loop-entry.
6539     // TODO-CQ: Couldn't we make this true if it's not?
6540     if (!fgDominate(head, lbeg))
6541     {
6542         return;
6543     }
6544
6545     // if lbeg is the start of a new try block then we won't be able to hoist
6546     if (!BasicBlock::sameTryRegion(head, lbeg))
6547     {
6548         return;
6549     }
6550
6551     // We don't bother hoisting when inside of a catch block
6552     if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
6553     {
6554         return;
6555     }
6556
6557     pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
6558
6559     unsigned begn = lbeg->bbNum;
6560     unsigned endn = tail->bbNum;
6561
6562     // Ensure the per-loop sets/tables are empty.
6563     hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
6564
6565 #ifdef DEBUG
6566     if (verbose)
6567     {
6568         printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
6569         printf("  Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
6570     }
6571 #endif
6572
6573     VARSET_TP loopVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
6574
6575     pLoopDsc->lpVarInOutCount    = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
6576     pLoopDsc->lpLoopVarCount     = VarSetOps::Count(this, loopVars);
6577     pLoopDsc->lpHoistedExprCount = 0;
6578
6579 #ifndef _TARGET_64BIT_
6580     unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
6581
6582     if (longVarsCount > 0)
6583     {
6584         // Since 64-bit variables take up two registers on 32-bit targets, we increase
6585         //  the Counts such that each TYP_LONG variable counts twice.
6586         //
6587         VARSET_TP loopLongVars(VarSetOps::Intersection(this, loopVars, lvaLongVars));
6588         VARSET_TP inOutLongVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
6589
6590 #ifdef DEBUG
6591         if (verbose)
6592         {
6593             printf("\n  LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
6594             lvaDispVarSet(lvaLongVars);
6595         }
6596 #endif
6597         pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
6598         pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
6599     }
6600 #endif // !_TARGET_64BIT_
6601
6602 #ifdef DEBUG
6603     if (verbose)
6604     {
6605         printf("\n  USEDEF  (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
6606         lvaDispVarSet(pLoopDsc->lpVarUseDef);
6607
6608         printf("\n  INOUT   (%d)=", pLoopDsc->lpVarInOutCount);
6609         lvaDispVarSet(pLoopDsc->lpVarInOut);
6610
6611         printf("\n  LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
6612         lvaDispVarSet(loopVars);
6613         printf("\n");
6614     }
6615 #endif
6616
6617     unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
6618
6619     if (floatVarsCount > 0)
6620     {
6621         VARSET_TP loopFPVars(VarSetOps::Intersection(this, loopVars, lvaFloatVars));
6622         VARSET_TP inOutFPVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
6623
6624         pLoopDsc->lpLoopVarFPCount     = VarSetOps::Count(this, loopFPVars);
6625         pLoopDsc->lpVarInOutFPCount    = VarSetOps::Count(this, inOutFPVars);
6626         pLoopDsc->lpHoistedFPExprCount = 0;
6627
6628         pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
6629         pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
6630
6631 #ifdef DEBUG
6632         if (verbose)
6633         {
6634             printf("  INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
6635             lvaDispVarSet(inOutFPVars);
6636
6637             printf("\n  LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
6638             lvaDispVarSet(loopFPVars);
6639         }
6640 #endif
6641     }
6642     else // (floatVarsCount == 0)
6643     {
6644         pLoopDsc->lpLoopVarFPCount     = 0;
6645         pLoopDsc->lpVarInOutFPCount    = 0;
6646         pLoopDsc->lpHoistedFPExprCount = 0;
6647     }
6648
6649     // Find the set of definitely-executed blocks.
6650     // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
6651     // Until we have post-dominators, we'll special-case for single-exit blocks.
6652     JitExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
6653     if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
6654     {
6655         assert(pLoopDsc->lpExit != nullptr);
6656         BasicBlock* cur = pLoopDsc->lpExit;
6657         // Push dominators, until we reach "entry" or exit the loop.
6658         while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
6659         {
6660             defExec.Push(cur);
6661             cur = cur->bbIDom;
6662         }
6663         // If we didn't reach the entry block, give up and *just* push the entry block.
6664         if (cur != pLoopDsc->lpEntry)
6665         {
6666             defExec.Reset();
6667         }
6668         defExec.Push(pLoopDsc->lpEntry);
6669     }
6670     else // More than one exit
6671     {
6672         // We'll assume that only the entry block is definitely executed.
6673         // We could in the future do better.
6674         defExec.Push(pLoopDsc->lpEntry);
6675     }
6676
6677     while (defExec.Size() > 0)
6678     {
6679         // Consider in reverse order: dominator before dominatee.
6680         BasicBlock* blk = defExec.Pop();
6681         optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
6682     }
6683 }
6684
6685 // Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
6686 void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
6687 {
6688     LoopDsc* pLoopDsc                      = &optLoopTable[lnum];
6689     bool     firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
6690     unsigned blkWeight                     = blk->getBBWeight(this);
6691
6692 #ifdef DEBUG
6693     if (verbose)
6694     {
6695         printf("    optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
6696                blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
6697                firstBlockAndBeforeSideEffect ? "true" : "false");
6698         if (blkWeight < (BB_UNITY_WEIGHT / 10))
6699         {
6700             printf("      block weight is too small to perform hoisting.\n");
6701         }
6702     }
6703 #endif
6704
6705     if (blkWeight < (BB_UNITY_WEIGHT / 10))
6706     {
6707         // Block weight is too small to perform hoisting.
6708         return;
6709     }
6710
6711     for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
6712     {
6713         GenTree* stmtTree = stmt->gtStmtExpr;
6714         bool     hoistable;
6715         bool     cctorDependent;
6716         (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable,
6717                                        &cctorDependent);
6718         if (hoistable)
6719         {
6720             // we will try to hoist the top-level stmtTree
6721             optHoistCandidate(stmtTree, lnum, hoistCtxt);
6722         }
6723     }
6724 }
6725
6726 bool Compiler::optIsProfitableToHoistableTree(GenTree* tree, unsigned lnum)
6727 {
6728     LoopDsc* pLoopDsc = &optLoopTable[lnum];
6729
6730     bool loopContainsCall = pLoopDsc->lpContainsCall;
6731
6732     int availRegCount;
6733     int hoistedExprCount;
6734     int loopVarCount;
6735     int varInOutCount;
6736
6737     if (varTypeIsFloating(tree->TypeGet()))
6738     {
6739         hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
6740         loopVarCount     = pLoopDsc->lpLoopVarFPCount;
6741         varInOutCount    = pLoopDsc->lpVarInOutFPCount;
6742
6743         availRegCount = CNT_CALLEE_SAVED_FLOAT;
6744         if (!loopContainsCall)
6745         {
6746             availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
6747         }
6748 #ifdef _TARGET_ARM_
6749         // For ARM each double takes two FP registers
6750         // For now on ARM we won't track singles/doubles
6751         // and instead just assume that we always have doubles.
6752         //
6753         availRegCount /= 2;
6754 #endif
6755     }
6756     else
6757     {
6758         hoistedExprCount = pLoopDsc->lpHoistedExprCount;
6759         loopVarCount     = pLoopDsc->lpLoopVarCount;
6760         varInOutCount    = pLoopDsc->lpVarInOutCount;
6761
6762         availRegCount = CNT_CALLEE_SAVED - 1;
6763         if (!loopContainsCall)
6764         {
6765             availRegCount += CNT_CALLEE_TRASH - 1;
6766         }
6767 #ifndef _TARGET_64BIT_
6768         // For our 32-bit targets Long types take two registers.
6769         if (varTypeIsLong(tree->TypeGet()))
6770         {
6771             availRegCount = (availRegCount + 1) / 2;
6772         }
6773 #endif
6774     }
6775
6776     // decrement the availRegCount by the count of expression that we have already hoisted.
6777     availRegCount -= hoistedExprCount;
6778
6779     // the variables that are read/written inside the loop should
6780     // always be a subset of the InOut variables for the loop
6781     assert(loopVarCount <= varInOutCount);
6782
6783     // When loopVarCount >= availRegCount we believe that all of the
6784     // available registers will get used to hold LclVars inside the loop.
6785     // This pessimistically assumes that each loopVar has a conflicting
6786     // lifetime with every other loopVar.
6787     // For this case we will hoist the expression only if is profitable
6788     // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
6789     // as we believe it will be placed in the stack or one of the other
6790     // loopVars will be spilled into the stack
6791     //
6792     if (loopVarCount >= availRegCount)
6793     {
6794         // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
6795         if (tree->gtCostEx < (2 * IND_COST_EX))
6796         {
6797             return false;
6798         }
6799     }
6800
6801     // When varInOutCount < availRegCount we are know that there are
6802     // some available register(s) when we enter the loop body.
6803     // When varInOutCount == availRegCount there often will be a register
6804     // available when we enter the loop body, since a loop often defines a
6805     // LclVar on exit or there is often at least one LclVar that is worth
6806     // spilling to the stack to make way for this hoisted expression.
6807     // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
6808     //
6809     if (varInOutCount > availRegCount)
6810     {
6811         // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
6812         if (tree->gtCostEx <= MIN_CSE_COST + 1)
6813         {
6814             return false;
6815         }
6816     }
6817
6818     return true;
6819 }
6820
6821 //
6822 //  This function returns true if 'tree' is a loop invariant expression.
6823 //  It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block,
6824 //  and sets '*pCctorDependent' if 'tree' is a function of a static field that must not be
6825 //  hoisted (even if '*pHoistable' is true) unless a preceding corresponding cctor init helper
6826 //  call is also hoisted.
6827 //
6828 bool Compiler::optHoistLoopExprsForTree(GenTree*          tree,
6829                                         unsigned          lnum,
6830                                         LoopHoistContext* hoistCtxt,
6831                                         bool*             pFirstBlockAndBeforeSideEffect,
6832                                         bool*             pHoistable,
6833                                         bool*             pCctorDependent)
6834 {
6835     // First do the children.
6836     // We must keep track of whether each child node was hoistable or not
6837     //
6838     unsigned nChildren = tree->NumChildren();
6839     bool     childrenHoistable[GenTree::MAX_CHILDREN];
6840     bool     childrenCctorDependent[GenTree::MAX_CHILDREN];
6841
6842     // Initialize the array elements for childrenHoistable[] to false
6843     for (unsigned i = 0; i < nChildren; i++)
6844     {
6845         childrenHoistable[i]      = false;
6846         childrenCctorDependent[i] = false;
6847     }
6848
6849     // Initclass CLS_VARs and IconHandles are the base cases of cctor dependent trees.
6850     // In the IconHandle case, it's of course the dereference, rather than the constant itself, that is
6851     // truly dependent on the cctor.  So a more precise approach would be to separately propagate
6852     // isCctorDependent and isAddressWhoseDereferenceWouldBeCctorDependent, but we don't for simplicity/throughput;
6853     // the constant itself would be considered non-hoistable anyway, since optIsCSEcandidate returns
6854     // false for constants.
6855     bool treeIsCctorDependent = ((tree->OperIs(GT_CLS_VAR) && ((tree->gtFlags & GTF_CLS_VAR_INITCLASS) != 0)) ||
6856                                  (tree->OperIs(GT_CNS_INT) && ((tree->gtFlags & GTF_ICON_INITCLASS) != 0)));
6857     bool treeIsInvariant = true;
6858     for (unsigned childNum = 0; childNum < nChildren; childNum++)
6859     {
6860         if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
6861                                       &childrenHoistable[childNum], &childrenCctorDependent[childNum]))
6862         {
6863             treeIsInvariant = false;
6864         }
6865
6866         if (childrenCctorDependent[childNum])
6867         {
6868             // Normally, a parent of a cctor-dependent tree is also cctor-dependent.
6869             treeIsCctorDependent = true;
6870
6871             // Check for the case where we can stop propagating cctor-dependent upwards.
6872             if (tree->OperIs(GT_COMMA) && (childNum == 1))
6873             {
6874                 GenTree* op1 = tree->gtGetOp1();
6875                 if (op1->OperIs(GT_CALL))
6876                 {
6877                     GenTreeCall* call = op1->AsCall();
6878                     if ((call->gtCallType == CT_HELPER) &&
6879                         s_helperCallProperties.MayRunCctor(eeGetHelperNum(call->gtCallMethHnd)))
6880                     {
6881                         // Hoisting the comma is ok because it would hoist the initialization along
6882                         // with the static field reference.
6883                         treeIsCctorDependent = false;
6884                         // Hoisting the static field without hoisting the initialization would be
6885                         // incorrect, make sure we consider the field (which we flagged as
6886                         // cctor-dependent) non-hoistable.
6887                         noway_assert(!childrenHoistable[childNum]);
6888                     }
6889                 }
6890             }
6891         }
6892     }
6893
6894     // If all the children of "tree" are hoistable, then "tree" itself can be hoisted,
6895     // unless it has a static var reference that can't be hoisted past its cctor call.
6896     bool treeIsHoistable = treeIsInvariant && !treeIsCctorDependent;
6897
6898     // But we must see if anything else prevents "tree" from being hoisted.
6899     //
6900     if (treeIsInvariant)
6901     {
6902         // Tree must be a suitable CSE candidate for us to be able to hoist it.
6903         treeIsHoistable &= optIsCSEcandidate(tree);
6904
6905         // If it's a call, it must be a helper call, and be pure.
6906         // Further, if it may run a cctor, it must be labeled as "Hoistable"
6907         // (meaning it won't run a cctor because the class is not precise-init).
6908         if (treeIsHoistable && tree->OperGet() == GT_CALL)
6909         {
6910             GenTreeCall* call = tree->AsCall();
6911             if (call->gtCallType != CT_HELPER)
6912             {
6913                 treeIsHoistable = false;
6914             }
6915             else
6916             {
6917                 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6918                 if (!s_helperCallProperties.IsPure(helpFunc))
6919                 {
6920                     treeIsHoistable = false;
6921                 }
6922                 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6923                 {
6924                     treeIsHoistable = false;
6925                 }
6926             }
6927         }
6928
6929         if (treeIsHoistable)
6930         {
6931             if (!(*pFirstBlockAndBeforeSideEffect))
6932             {
6933                 // For now, we give up on an expression that might raise an exception if it is after the
6934                 // first possible global side effect (and we assume we're after that if we're not in the first block).
6935                 // TODO-CQ: this is when we might do loop cloning.
6936                 //
6937                 if ((tree->gtFlags & GTF_EXCEPT) != 0)
6938                 {
6939                     treeIsHoistable = false;
6940                 }
6941             }
6942         }
6943
6944         // Is the value of the whole tree loop invariant?
6945         treeIsInvariant =
6946             optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
6947
6948         // Is the value of the whole tree loop invariant?
6949         if (!treeIsInvariant)
6950         {
6951             treeIsHoistable = false;
6952         }
6953     }
6954
6955     // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
6956     // If we encounter a tree with a call in it
6957     //  or if we see an assignment to global we set it to false.
6958     //
6959     // If we are already set to false then we can skip these checks
6960     //
6961     if (*pFirstBlockAndBeforeSideEffect)
6962     {
6963         // For this purpose, we only care about memory side effects.  We assume that expressions will
6964         // be hoisted so that they are evaluated in the same order as they would have been in the loop,
6965         // and therefore throw exceptions in the same order.  (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
6966         // here, since that includes exceptions.)
6967         if (tree->IsCall())
6968         {
6969             // If it's a call, it must be a helper call that does not mutate the heap.
6970             // Further, if it may run a cctor, it must be labeled as "Hoistable"
6971             // (meaning it won't run a cctor because the class is not precise-init).
6972             GenTreeCall* call = tree->AsCall();
6973             if (call->gtCallType != CT_HELPER)
6974             {
6975                 *pFirstBlockAndBeforeSideEffect = false;
6976             }
6977             else
6978             {
6979                 CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
6980                 if (s_helperCallProperties.MutatesHeap(helpFunc))
6981                 {
6982                     *pFirstBlockAndBeforeSideEffect = false;
6983                 }
6984                 else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
6985                 {
6986                     *pFirstBlockAndBeforeSideEffect = false;
6987                 }
6988             }
6989         }
6990         else if (tree->OperIsAssignment())
6991         {
6992             // If the LHS of the assignment has a global reference, then assume it's a global side effect.
6993             GenTree* lhs = tree->gtOp.gtOp1;
6994             if (lhs->gtFlags & GTF_GLOB_REF)
6995             {
6996                 *pFirstBlockAndBeforeSideEffect = false;
6997             }
6998         }
6999         else if (tree->OperIsCopyBlkOp())
7000         {
7001             GenTree* args = tree->gtOp.gtOp1;
7002             assert(args->OperGet() == GT_LIST);
7003             if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
7004             {
7005                 *pFirstBlockAndBeforeSideEffect = false;
7006             }
7007         }
7008     }
7009
7010     // If this 'tree' is hoistable then we return and the caller will
7011     // decide to hoist it as part of larger hoistable expression.
7012     //
7013     if (!treeIsHoistable)
7014     {
7015         // We are not hoistable so we will now hoist any hoistable children.
7016         //
7017         for (unsigned childNum = 0; childNum < nChildren; childNum++)
7018         {
7019             if (childrenHoistable[childNum])
7020             {
7021                 // We can't hoist the LHS of an assignment, isn't a real use.
7022                 if (childNum == 0 && (tree->OperIsAssignment()))
7023                 {
7024                     continue;
7025                 }
7026
7027                 GenTree* child = tree->GetChild(childNum);
7028
7029                 // We try to hoist this 'child' tree
7030                 optHoistCandidate(child, lnum, hoistCtxt);
7031             }
7032         }
7033     }
7034
7035     *pHoistable      = treeIsHoistable;
7036     *pCctorDependent = treeIsCctorDependent;
7037     return treeIsInvariant;
7038 }
7039
7040 void Compiler::optHoistCandidate(GenTree* tree, unsigned lnum, LoopHoistContext* hoistCtxt)
7041 {
7042     if (lnum == BasicBlock::NOT_IN_LOOP)
7043     {
7044         // The hoisted expression isn't valid at any loop head so don't hoist this expression.
7045         return;
7046     }
7047
7048     // The outer loop also must be suitable for hoisting...
7049     if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
7050     {
7051         return;
7052     }
7053
7054     // If the hoisted expression isn't valid at this loop head then break
7055     if (!optTreeIsValidAtLoopHead(tree, lnum))
7056     {
7057         return;
7058     }
7059
7060     // It must pass the hoistable profitablity tests for this loop level
7061     if (!optIsProfitableToHoistableTree(tree, lnum))
7062     {
7063         return;
7064     }
7065
7066     bool b;
7067     if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
7068     {
7069         // already hoisted in a parent loop, so don't hoist this expression.
7070         return;
7071     }
7072
7073     if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
7074     {
7075         // already hoisted this expression in the current loop, so don't hoist this expression.
7076         return;
7077     }
7078
7079     // Expression can be hoisted
7080     optPerformHoistExpr(tree, lnum);
7081
7082     // Increment lpHoistedExprCount or lpHoistedFPExprCount
7083     if (!varTypeIsFloating(tree->TypeGet()))
7084     {
7085         optLoopTable[lnum].lpHoistedExprCount++;
7086 #ifndef _TARGET_64BIT_
7087         // For our 32-bit targets Long types take two registers.
7088         if (varTypeIsLong(tree->TypeGet()))
7089         {
7090             optLoopTable[lnum].lpHoistedExprCount++;
7091         }
7092 #endif
7093     }
7094     else // Floating point expr hoisted
7095     {
7096         optLoopTable[lnum].lpHoistedFPExprCount++;
7097     }
7098
7099     // Record the hoisted expression in hoistCtxt
7100     hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
7101 }
7102
7103 bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
7104 {
7105     // If it is not a VN, is not loop-invariant.
7106     if (vn == ValueNumStore::NoVN)
7107     {
7108         return false;
7109     }
7110
7111     // We'll always short-circuit constants.
7112     if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
7113     {
7114         return true;
7115     }
7116
7117     // If we've done this query previously, don't repeat.
7118     bool previousRes = false;
7119     if (loopVnInvariantCache->Lookup(vn, &previousRes))
7120     {
7121         return previousRes;
7122     }
7123
7124     bool      res = true;
7125     VNFuncApp funcApp;
7126     if (vnStore->GetVNFunc(vn, &funcApp))
7127     {
7128         if (funcApp.m_func == VNF_PhiDef)
7129         {
7130             // First, make sure it's a "proper" phi -- the definition is a Phi application.
7131             VNFuncApp phiDefValFuncApp;
7132             if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
7133             {
7134                 // It's not *really* a definition, rather a pass-through of some other VN.
7135                 // (This could occur, say if both sides of an if-then-else diamond made the
7136                 // same assignment to a variable.)
7137                 res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
7138             }
7139             else
7140             {
7141                 // Is the definition within the loop?  If so, is not loop-invariant.
7142                 unsigned      lclNum = funcApp.m_args[0];
7143                 unsigned      ssaNum = funcApp.m_args[1];
7144                 LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
7145                 res                  = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
7146             }
7147         }
7148         else if (funcApp.m_func == VNF_PhiMemoryDef)
7149         {
7150             BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
7151             res                 = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
7152         }
7153         else
7154         {
7155             for (unsigned i = 0; i < funcApp.m_arity; i++)
7156             {
7157                 // TODO-CQ: We need to either make sure that *all* VN functions
7158                 // always take VN args, or else have a list of arg positions to exempt, as implicitly
7159                 // constant.
7160                 if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
7161                 {
7162                     res = false;
7163                     break;
7164                 }
7165             }
7166         }
7167     }
7168     else
7169     {
7170         // Non-function "new, unique" VN's may be annotated with the loop nest where
7171         // their definition occurs.
7172         BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
7173
7174         if (vnLoopNum == MAX_LOOP_NUM)
7175         {
7176             res = false;
7177         }
7178         else
7179         {
7180             res = !optLoopContains(lnum, vnLoopNum);
7181         }
7182     }
7183
7184     loopVnInvariantCache->Set(vn, res);
7185     return res;
7186 }
7187
7188 bool Compiler::optTreeIsValidAtLoopHead(GenTree* tree, unsigned lnum)
7189 {
7190     if (tree->OperIsLocal())
7191     {
7192         GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
7193         unsigned             lclNum = lclVar->gtLclNum;
7194
7195         // The lvlVar must be have an Ssa tracked lifetime
7196         if (fgExcludeFromSsa(lclNum))
7197         {
7198             return false;
7199         }
7200
7201         // If the loop does not contains the SSA def we can hoist it.
7202         if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
7203         {
7204             return true;
7205         }
7206     }
7207     else if (tree->OperIsConst())
7208     {
7209         return true;
7210     }
7211     else // If every one of the children nodes are valid at this Loop's Head.
7212     {
7213         unsigned nChildren = tree->NumChildren();
7214         for (unsigned childNum = 0; childNum < nChildren; childNum++)
7215         {
7216             if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
7217             {
7218                 return false;
7219             }
7220         }
7221         return true;
7222     }
7223     return false;
7224 }
7225
7226 /*****************************************************************************
7227  *
7228  *  Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
7229  *  header. The pre-header will replace the current lpHead in the loop table.
7230  *  The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
7231  *  will also be dominated by the loop-top, lpHead->bbNext.
7232  *
7233  */
7234
7235 void Compiler::fgCreateLoopPreHeader(unsigned lnum)
7236 {
7237     LoopDsc* pLoopDsc = &optLoopTable[lnum];
7238
7239     /* This loop has to be a "do-while" loop */
7240
7241     assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
7242
7243     /* Have we already created a loop-preheader block? */
7244
7245     if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
7246     {
7247         return;
7248     }
7249
7250     BasicBlock* head  = pLoopDsc->lpHead;
7251     BasicBlock* top   = pLoopDsc->lpTop;
7252     BasicBlock* entry = pLoopDsc->lpEntry;
7253
7254     // if 'entry' and 'head' are in different try regions then we won't be able to hoist
7255     if (!BasicBlock::sameTryRegion(head, entry))
7256     {
7257         return;
7258     }
7259
7260     // Ensure that lpHead always dominates lpEntry
7261
7262     noway_assert(fgDominate(head, entry));
7263
7264     /* Get hold of the first block of the loop body */
7265
7266     assert(top == entry);
7267
7268     /* Allocate a new basic block */
7269
7270     BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
7271     preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
7272
7273     // Must set IL code offset
7274     preHead->bbCodeOffs = top->bbCodeOffs;
7275
7276     // Set the default value of the preHead weight in case we don't have
7277     // valid profile data and since this blocks weight is just an estimate
7278     // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
7279     //
7280     preHead->inheritWeight(head);
7281     preHead->bbFlags &= ~BBF_PROF_WEIGHT;
7282
7283 #ifdef DEBUG
7284     if (verbose)
7285     {
7286         printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
7287                lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
7288     }
7289 #endif
7290
7291     // The preheader block is part of the containing loop (if any).
7292     preHead->bbNatLoopNum = pLoopDsc->lpParent;
7293
7294     if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
7295     {
7296         if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
7297         {
7298             preHead->bbWeight = 0;
7299             preHead->bbFlags |= BBF_RUN_RARELY;
7300         }
7301         else
7302         {
7303             bool allValidProfileWeights =
7304                 (head->hasProfileWeight() && head->bbJumpDest->hasProfileWeight() && head->bbNext->hasProfileWeight());
7305
7306             if (allValidProfileWeights)
7307             {
7308                 double loopEnteredCount;
7309                 double loopSkippedCount;
7310
7311                 if (fgHaveValidEdgeWeights)
7312                 {
7313                     flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
7314                     flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
7315                     noway_assert(edgeToNext != nullptr);
7316                     noway_assert(edgeToJump != nullptr);
7317
7318                     loopEnteredCount =
7319                         ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
7320                     loopSkippedCount =
7321                         ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
7322                 }
7323                 else
7324                 {
7325                     loopEnteredCount = (double)head->bbNext->bbWeight;
7326                     loopSkippedCount = (double)head->bbJumpDest->bbWeight;
7327                 }
7328
7329                 double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
7330
7331                 // Calculate a good approximation of the preHead's block weight
7332                 unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
7333                 preHead->setBBWeight(max(preHeadWeight, 1));
7334                 noway_assert(!preHead->isRunRarely());
7335             }
7336         }
7337     }
7338
7339     // Link in the preHead block.
7340     fgInsertBBbefore(top, preHead);
7341
7342     // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
7343     // However, that is too expensive at this point. Instead, we update the phi
7344     // node block references, if we created pre-header block due to hoisting.
7345     // This is sufficient because any definition participating in SSA that flowed
7346     // into the phi via the loop header block will now flow through the preheader
7347     // block from the header block.
7348
7349     for (GenTree* stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
7350     {
7351         GenTree* tree = stmt->gtStmt.gtStmtExpr;
7352         if (tree->OperGet() != GT_ASG)
7353         {
7354             break;
7355         }
7356         GenTree* op2 = tree->gtGetOp2();
7357         if (op2->OperGet() != GT_PHI)
7358         {
7359             break;
7360         }
7361         GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
7362         while (args != nullptr)
7363         {
7364             GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
7365             if (phiArg->gtPredBB == head)
7366             {
7367                 phiArg->gtPredBB = preHead;
7368             }
7369             args = args->Rest();
7370         }
7371     }
7372
7373     // The handler can't begin at the top of the loop.  If it did, it would be incorrect
7374     // to set the handler index on the pre header without updating the exception table.
7375     noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
7376
7377     // Update the EH table to make the hoisted block part of the loop's EH block.
7378     fgExtendEHRegionBefore(top);
7379
7380     // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
7381     //        (e.g: hoisting expression in a loop with the same 'head' as this one)
7382
7383     /* Update the loop entry */
7384
7385     pLoopDsc->lpHead = preHead;
7386     pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
7387
7388     /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
7389        All predecessors of 'beg', (which is the entry in the loop)
7390        now have to jump to 'preHead', unless they are dominated by 'head' */
7391
7392     preHead->bbRefs = 0;
7393     fgAddRefPred(preHead, head);
7394     bool checkNestedLoops = false;
7395
7396     for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
7397     {
7398         BasicBlock* predBlock = pred->flBlock;
7399
7400         if (fgDominate(top, predBlock))
7401         {
7402             // note: if 'top' dominates predBlock, 'head' dominates predBlock too
7403             // (we know that 'head' dominates 'top'), but using 'top' instead of
7404             // 'head' in the test allows us to not enter here if 'predBlock == head'
7405
7406             if (predBlock != pLoopDsc->lpBottom)
7407             {
7408                 noway_assert(predBlock != head);
7409                 checkNestedLoops = true;
7410             }
7411             continue;
7412         }
7413
7414         switch (predBlock->bbJumpKind)
7415         {
7416             case BBJ_NONE:
7417                 noway_assert(predBlock == head);
7418                 break;
7419
7420             case BBJ_COND:
7421                 if (predBlock == head)
7422                 {
7423                     noway_assert(predBlock->bbJumpDest != top);
7424                     break;
7425                 }
7426                 __fallthrough;
7427
7428             case BBJ_ALWAYS:
7429             case BBJ_EHCATCHRET:
7430                 noway_assert(predBlock->bbJumpDest == top);
7431                 predBlock->bbJumpDest = preHead;
7432                 preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
7433
7434                 if (predBlock == head)
7435                 {
7436                     // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
7437                     // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
7438                     // Just break, pred will be removed after switch.
7439                 }
7440                 else
7441                 {
7442                     fgRemoveRefPred(top, predBlock);
7443                     fgAddRefPred(preHead, predBlock);
7444                 }
7445                 break;
7446
7447             case BBJ_SWITCH:
7448                 unsigned jumpCnt;
7449                 jumpCnt = predBlock->bbJumpSwt->bbsCount;
7450                 BasicBlock** jumpTab;
7451                 jumpTab = predBlock->bbJumpSwt->bbsDstTab;
7452
7453                 do
7454                 {
7455                     assert(*jumpTab);
7456                     if ((*jumpTab) == top)
7457                     {
7458                         (*jumpTab) = preHead;
7459
7460                         fgRemoveRefPred(top, predBlock);
7461                         fgAddRefPred(preHead, predBlock);
7462                         preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
7463                     }
7464                 } while (++jumpTab, --jumpCnt);
7465
7466             default:
7467                 noway_assert(!"Unexpected bbJumpKind");
7468                 break;
7469         }
7470     }
7471
7472     noway_assert(!fgGetPredForBlock(top, preHead));
7473     fgRemoveRefPred(top, head);
7474     fgAddRefPred(top, preHead);
7475
7476     /*
7477         If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
7478         (other than the back-edge of the loop we are considering) then we likely have nested
7479         do-while loops with the same entry block and inserting the preheader block changes the head
7480         of all the nested loops. Now we will update this piece of information in the loop table, and
7481         mark all nested loops as having a preheader (the preheader block can be shared among all nested
7482         do-while loops with the same entry block).
7483     */
7484     if (checkNestedLoops)
7485     {
7486         for (unsigned l = 0; l < optLoopCount; l++)
7487         {
7488             if (optLoopTable[l].lpHead == head)
7489             {
7490                 noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
7491                 noway_assert(optLoopTable[l].lpEntry == top);
7492                 optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
7493                 optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
7494 #ifdef DEBUG
7495                 if (verbose)
7496                 {
7497                     printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
7498                            l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
7499                 }
7500 #endif
7501             }
7502         }
7503     }
7504 }
7505
7506 bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
7507 {
7508     for (unsigned lnum = blk->bbNatLoopNum; lnum != BasicBlock::NOT_IN_LOOP; lnum = optLoopTable[lnum].lpParent)
7509     {
7510         if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
7511         {
7512             continue;
7513         }
7514         if (optLoopTable[lnum].lpEntry == blk)
7515         {
7516             *pLnum = lnum;
7517             return true;
7518         }
7519     }
7520     return false;
7521 }
7522
7523 void Compiler::optComputeLoopSideEffects()
7524 {
7525     unsigned lnum;
7526     for (lnum = 0; lnum < optLoopCount; lnum++)
7527     {
7528         VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
7529         VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
7530         optLoopTable[lnum].lpContainsCall = false;
7531     }
7532
7533     for (lnum = 0; lnum < optLoopCount; lnum++)
7534     {
7535         if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
7536         {
7537             continue;
7538         }
7539
7540         if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
7541         { // Is outermost...
7542             optComputeLoopNestSideEffects(lnum);
7543         }
7544     }
7545
7546     VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
7547 #ifndef _TARGET_64BIT_
7548     VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
7549 #endif
7550
7551     for (unsigned i = 0; i < lvaCount; i++)
7552     {
7553         LclVarDsc* varDsc = &lvaTable[i];
7554         if (varDsc->lvTracked)
7555         {
7556             if (varTypeIsFloating(varDsc->lvType))
7557             {
7558                 VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
7559             }
7560 #ifndef _TARGET_64BIT_
7561             else if (varTypeIsLong(varDsc->lvType))
7562             {
7563                 VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
7564             }
7565 #endif
7566         }
7567     }
7568 }
7569
7570 void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
7571 {
7572     assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
7573     BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
7574     for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
7575     {
7576         optComputeLoopSideEffectsOfBlock(bbInLoop);
7577     }
7578 }
7579
7580 void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
7581 {
7582     unsigned mostNestedLoop = blk->bbNatLoopNum;
7583     assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
7584
7585     AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
7586
7587     // MemoryKinds for which an in-loop call or store has arbitrary effects.
7588     MemoryKindSet memoryHavoc = emptyMemoryKindSet;
7589
7590     // Now iterate over the remaining statements, and their trees.
7591     for (GenTree* stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
7592     {
7593         for (GenTree* tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
7594         {
7595             genTreeOps oper = tree->OperGet();
7596
7597             // Even after we set memoryHavoc we still may want to know if a loop contains calls
7598             if (memoryHavoc == fullMemoryKindSet)
7599             {
7600                 if (oper == GT_CALL)
7601                 {
7602                     // Record that this loop contains a call
7603                     AddContainsCallAllContainingLoops(mostNestedLoop);
7604                 }
7605
7606                 // If we just set lpContainsCall or it was previously set
7607                 if (optLoopTable[mostNestedLoop].lpContainsCall)
7608                 {
7609                     // We can early exit after both memoryHavoc and lpContainsCall are both set to true.
7610                     break;
7611                 }
7612
7613                 // We are just looking for GT_CALL nodes after memoryHavoc was set.
7614                 continue;
7615             }
7616
7617             // otherwise memoryHavoc is not set for at least one heap ID
7618             assert(memoryHavoc != fullMemoryKindSet);
7619
7620             // This body is a distillation of the memory side-effect code of value numbering.
7621             // We also do a very limited analysis if byref PtrTo values, to cover some cases
7622             // that the compiler creates.
7623
7624             if (GenTree::OperIsAssignment(oper))
7625             {
7626                 GenTree* lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
7627
7628                 if (lhs->OperGet() == GT_IND)
7629                 {
7630                     GenTree*      arg           = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
7631                     FieldSeqNode* fldSeqArrElem = nullptr;
7632
7633                     if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
7634                     {
7635                         memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7636                         continue;
7637                     }
7638
7639                     ArrayInfo arrInfo;
7640
7641                     if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
7642                     {
7643                         // If it's a local byref for which we recorded a value number, use that...
7644                         GenTreeLclVar* argLcl = arg->AsLclVar();
7645                         if (!fgExcludeFromSsa(argLcl->GetLclNum()))
7646                         {
7647                             ValueNum argVN =
7648                                 lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
7649                             VNFuncApp funcApp;
7650                             if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
7651                                 funcApp.m_func == VNF_PtrToArrElem)
7652                             {
7653                                 assert(vnStore->IsVNHandle(funcApp.m_args[0]));
7654                                 CORINFO_CLASS_HANDLE elemType =
7655                                     CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
7656                                 AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
7657                                 // Don't set memoryHavoc for GcHeap below.  Do set memoryHavoc for ByrefExposed
7658                                 // (conservatively assuming that a byref may alias the array element)
7659                                 memoryHavoc |= memoryKindSet(ByrefExposed);
7660                                 continue;
7661                             }
7662                         }
7663                         // Otherwise...
7664                         memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7665                     }
7666                     // Is the LHS an array index expression?
7667                     else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
7668                     {
7669                         // We actually ignore "fldSeq" -- any modification to an S[], at any
7670                         // field of "S", will lose all information about the array type.
7671                         CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7672                         AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
7673                         // Conservatively assume byrefs may alias this array element
7674                         memoryHavoc |= memoryKindSet(ByrefExposed);
7675                     }
7676                     else
7677                     {
7678                         // We are only interested in IsFieldAddr()'s fldSeq out parameter.
7679                         //
7680                         GenTree*      obj          = nullptr; // unused
7681                         GenTree*      staticOffset = nullptr; // unused
7682                         FieldSeqNode* fldSeq       = nullptr;
7683
7684                         if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
7685                             (fldSeq != FieldSeqStore::NotAField()))
7686                         {
7687                             // Get the first (object) field from field seq.  GcHeap[field] will yield the "field map".
7688                             assert(fldSeq != nullptr);
7689                             if (fldSeq->IsFirstElemFieldSeq())
7690                             {
7691                                 fldSeq = fldSeq->m_next;
7692                                 assert(fldSeq != nullptr);
7693                             }
7694
7695                             AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
7696                             // Conservatively assume byrefs may alias this object.
7697                             memoryHavoc |= memoryKindSet(ByrefExposed);
7698                         }
7699                         else
7700                         {
7701                             memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7702                         }
7703                     }
7704                 }
7705                 else if (lhs->OperIsBlk())
7706                 {
7707                     GenTreeLclVarCommon* lclVarTree;
7708                     bool                 isEntire;
7709                     if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
7710                     {
7711                         // For now, assume arbitrary side effects on GcHeap/ByrefExposed...
7712                         memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7713                     }
7714                     else if (lvaVarAddrExposed(lclVarTree->gtLclNum))
7715                     {
7716                         memoryHavoc |= memoryKindSet(ByrefExposed);
7717                     }
7718                 }
7719                 else if (lhs->OperGet() == GT_CLS_VAR)
7720                 {
7721                     AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
7722                     // Conservatively assume byrefs may alias this static field
7723                     memoryHavoc |= memoryKindSet(ByrefExposed);
7724                 }
7725                 // Otherwise, must be local lhs form.  I should assert that.
7726                 else if (lhs->OperGet() == GT_LCL_VAR)
7727                 {
7728                     GenTreeLclVar* lhsLcl = lhs->AsLclVar();
7729                     GenTree*       rhs    = tree->gtOp.gtOp2;
7730                     ValueNum       rhsVN  = rhs->gtVNPair.GetLiberal();
7731                     // If we gave the RHS a value number, propagate it.
7732                     if (rhsVN != ValueNumStore::NoVN)
7733                     {
7734                         rhsVN = vnStore->VNNormVal(rhsVN);
7735                         if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
7736                         {
7737                             lvaTable[lhsLcl->GetLclNum()]
7738                                 .GetPerSsaData(lhsLcl->GetSsaNum())
7739                                 ->m_vnPair.SetLiberal(rhsVN);
7740                         }
7741                     }
7742                     // If the local is address-exposed, count this as ByrefExposed havoc
7743                     if (lvaVarAddrExposed(lhsLcl->gtLclNum))
7744                     {
7745                         memoryHavoc |= memoryKindSet(ByrefExposed);
7746                     }
7747                 }
7748             }
7749             else // not GenTree::OperIsAssignment(oper)
7750             {
7751                 switch (oper)
7752                 {
7753                     case GT_COMMA:
7754                         tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
7755                         break;
7756
7757                     case GT_ADDR:
7758                         // Is it an addr of a array index expression?
7759                         {
7760                             GenTree* addrArg = tree->gtOp.gtOp1;
7761                             if (addrArg->OperGet() == GT_IND)
7762                             {
7763                                 // Is the LHS an array index expression?
7764                                 if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
7765                                 {
7766                                     ArrayInfo arrInfo;
7767                                     bool      b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
7768                                     assert(b);
7769                                     CORINFO_CLASS_HANDLE elemType =
7770                                         EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
7771                                     tree->gtVNPair.SetBoth(
7772                                         vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
7773                                                            vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
7774                                                            // The rest are dummy arguments.
7775                                                            vnStore->VNForNull(), vnStore->VNForNull(),
7776                                                            vnStore->VNForNull()));
7777                                 }
7778                             }
7779                         }
7780                         break;
7781
7782                     case GT_LOCKADD: // Binop
7783                     case GT_XADD:    // Binop
7784                     case GT_XCHG:    // Binop
7785                     case GT_CMPXCHG: // Specialop
7786                     {
7787                         memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7788                     }
7789                     break;
7790
7791                     case GT_CALL:
7792                     {
7793                         GenTreeCall* call = tree->AsCall();
7794
7795                         // Record that this loop contains a call
7796                         AddContainsCallAllContainingLoops(mostNestedLoop);
7797
7798                         if (call->gtCallType == CT_HELPER)
7799                         {
7800                             CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
7801                             if (s_helperCallProperties.MutatesHeap(helpFunc))
7802                             {
7803                                 memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7804                             }
7805                             else if (s_helperCallProperties.MayRunCctor(helpFunc))
7806                             {
7807                                 // If the call is labeled as "Hoistable", then we've checked the
7808                                 // class that would be constructed, and it is not precise-init, so
7809                                 // the cctor will not be run by this call.  Otherwise, it might be,
7810                                 // and might have arbitrary side effects.
7811                                 if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
7812                                 {
7813                                     memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7814                                 }
7815                             }
7816                         }
7817                         else
7818                         {
7819                             memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
7820                         }
7821                         break;
7822                     }
7823
7824                     default:
7825                         // All other gtOper node kinds, leave 'memoryHavoc' unchanged (i.e. false)
7826                         break;
7827                 }
7828             }
7829         }
7830     }
7831
7832     if (memoryHavoc != emptyMemoryKindSet)
7833     {
7834         // Record that all loops containing this block have memory havoc effects.
7835         unsigned lnum = mostNestedLoop;
7836         while (lnum != BasicBlock::NOT_IN_LOOP)
7837         {
7838             for (MemoryKind memoryKind : allMemoryKinds())
7839             {
7840                 if ((memoryHavoc & memoryKindSet(memoryKind)) != 0)
7841                 {
7842                     optLoopTable[lnum].lpLoopHasMemoryHavoc[memoryKind] = true;
7843                 }
7844             }
7845             lnum = optLoopTable[lnum].lpParent;
7846         }
7847     }
7848 }
7849
7850 // Marks the containsCall information to "lnum" and any parent loops.
7851 void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
7852 {
7853     assert(0 <= lnum && lnum < optLoopCount);
7854     while (lnum != BasicBlock::NOT_IN_LOOP)
7855     {
7856         optLoopTable[lnum].lpContainsCall = true;
7857         lnum                              = optLoopTable[lnum].lpParent;
7858     }
7859 }
7860
7861 // Adds the variable liveness information for 'blk' to 'this' LoopDsc
7862 void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
7863 {
7864     VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
7865     VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
7866
7867     VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
7868     VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
7869 }
7870
7871 // Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
7872 void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
7873 {
7874     assert(0 <= lnum && lnum < optLoopCount);
7875     while (lnum != BasicBlock::NOT_IN_LOOP)
7876     {
7877         optLoopTable[lnum].AddVariableLiveness(this, blk);
7878         lnum = optLoopTable[lnum].lpParent;
7879     }
7880 }
7881
7882 // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
7883 void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
7884 {
7885     assert(0 <= lnum && lnum < optLoopCount);
7886     while (lnum != BasicBlock::NOT_IN_LOOP)
7887     {
7888         optLoopTable[lnum].AddModifiedField(this, fldHnd);
7889         lnum = optLoopTable[lnum].lpParent;
7890     }
7891 }
7892
7893 // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
7894 void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
7895 {
7896     assert(0 <= lnum && lnum < optLoopCount);
7897     while (lnum != BasicBlock::NOT_IN_LOOP)
7898     {
7899         optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
7900         lnum = optLoopTable[lnum].lpParent;
7901     }
7902 }
7903
7904 /*****************************************************************************
7905  *
7906  *  Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
7907  *  The 'keepList'is either a single tree or a list of trees that are formed by
7908  *  one or more GT_COMMA nodes.  It is the kept side-effects as returned by the
7909  *  gtExtractSideEffList method.
7910  */
7911
7912 /* static */
7913 Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTree** pTree, fgWalkData* data)
7914 {
7915     GenTree*  tree     = *pTree;
7916     Compiler* comp     = data->compiler;
7917     GenTree*  keepList = (GenTree*)(data->pCallbackData);
7918
7919     // We may have a non-NULL side effect list that is being kept
7920     //
7921     if (keepList)
7922     {
7923         GenTree* keptTree = keepList;
7924         while (keptTree->OperGet() == GT_COMMA)
7925         {
7926             assert(keptTree->OperKind() & GTK_SMPOP);
7927             GenTree* op1 = keptTree->gtOp.gtOp1;
7928             GenTree* op2 = keptTree->gtGetOp2();
7929
7930             // For the GT_COMMA case the op1 is part of the orginal CSE tree
7931             // that is being kept because it contains some side-effect
7932             //
7933             if (tree == op1)
7934             {
7935                 // This tree and all of its sub trees are being kept.
7936                 return WALK_SKIP_SUBTREES;
7937             }
7938
7939             // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
7940             // which can again be another GT_COMMA or the final side-effect part
7941             //
7942             keptTree = op2;
7943         }
7944         if (tree == keptTree)
7945         {
7946             // This tree and all of its sub trees are being kept.
7947             return WALK_SKIP_SUBTREES;
7948         }
7949     }
7950
7951     // This node is being removed from the graph of GenTree*
7952
7953     // Look for any local variable references
7954
7955     if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
7956     {
7957         unsigned   lclNum;
7958         LclVarDsc* varDsc;
7959
7960         /* This variable ref is going away, decrease its ref counts */
7961
7962         lclNum = tree->gtLclVarCommon.gtLclNum;
7963         assert(lclNum < comp->lvaCount);
7964         varDsc = comp->lvaTable + lclNum;
7965
7966         // make sure it's been initialized
7967         assert(comp->compCurBB != nullptr);
7968         assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
7969
7970         /* Decrement its lvRefCnt and lvRefCntWtd */
7971
7972         // Use getBBWeight to determine the proper block weight.
7973         // This impacts the block weights when we have IBC data.
7974         varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
7975     }
7976
7977     return WALK_CONTINUE;
7978 }
7979
7980 /*****************************************************************************
7981  *
7982  *  Routine called to decrement the LclVar ref counts when removing a tree
7983  *  during the remove RangeCheck phase.
7984  *  This method will decrement the refcounts for any LclVars used below 'deadTree',
7985  *  unless the node is found in the 'keepList' (which are saved side effects)
7986  *  The keepList is communicated using the walkData.pCallbackData field
7987  *  Also the compCurBB must be set to the current BasicBlock  which contains
7988  *  'deadTree' as we need to fetch the block weight when decrementing the ref counts.
7989  */
7990
7991 void Compiler::optRemoveTree(GenTree* deadTree, GenTree* keepList)
7992 {
7993     // We communicate this value using the walkData.pCallbackData field
7994     //
7995     fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
7996 }
7997
7998 //------------------------------------------------------------------------------
7999 // optRemoveRangeCheck : Given an array index node, mark it as not needing a range check.
8000 //
8001 // Arguments:
8002 //    tree   -  Range check tree
8003 //    stmt   -  Statement the tree belongs to
8004
8005 void Compiler::optRemoveRangeCheck(GenTree* tree, GenTree* stmt)
8006 {
8007 #if !REARRANGE_ADDS
8008     noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
8009 #endif
8010
8011     noway_assert(stmt->gtOper == GT_STMT);
8012     noway_assert(tree->gtOper == GT_COMMA);
8013
8014     GenTree* bndsChkTree = tree->gtOp.gtOp1;
8015
8016     noway_assert(bndsChkTree->OperIsBoundsCheck());
8017
8018     GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
8019
8020 #ifdef DEBUG
8021     if (verbose)
8022     {
8023         printf("Before optRemoveRangeCheck:\n");
8024         gtDispTree(tree);
8025     }
8026 #endif
8027
8028     GenTree* sideEffList = nullptr;
8029
8030     gtExtractSideEffList(bndsChkTree, &sideEffList, GTF_ASG);
8031
8032     // Decrement the ref counts for any LclVars that are being deleted
8033     //
8034     optRemoveTree(bndsChkTree, sideEffList);
8035
8036     // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
8037     tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
8038     // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
8039     tree->gtFlags |= GTF_DONT_CSE;
8040
8041     gtUpdateSideEffects(stmt, tree);
8042
8043     /* Recalculate the gtCostSz, etc... */
8044     gtSetStmtInfo(stmt);
8045
8046     /* Re-thread the nodes if necessary */
8047     if (fgStmtListThreaded)
8048     {
8049         fgSetStmtSeq(stmt);
8050     }
8051
8052 #ifdef DEBUG
8053     if (verbose)
8054     {
8055         printf("After optRemoveRangeCheck:\n");
8056         gtDispTree(tree);
8057     }
8058 #endif
8059 }
8060
8061 /*****************************************************************************
8062  * Return the scale in an array reference, given a pointer to the
8063  * multiplication node.
8064  */
8065
8066 ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTree* mul, GenTree** pIndex DEBUGARG(bool bRngChk))
8067 {
8068     assert(mul);
8069     assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
8070     assert(mul->gtOp.gtOp2->IsCnsIntOrI());
8071
8072     ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
8073
8074     if (mul->gtOper == GT_LSH)
8075     {
8076         scale = ((ssize_t)1) << scale;
8077     }
8078
8079     GenTree* index = mul->gtOp.gtOp1;
8080
8081     if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
8082     {
8083         // case of two cascading multiplications for constant int (e.g.  * 20 morphed to * 5 * 4):
8084         // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
8085         //     we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
8086         // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
8087         scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
8088         index = index->gtOp.gtOp1;
8089     }
8090
8091     assert(!bRngChk || index->gtOper != GT_COMMA);
8092
8093     if (pIndex)
8094     {
8095         *pIndex = index;
8096     }
8097
8098     return scale;
8099 }
8100
8101 /*****************************************************************************
8102  * Find the last assignment to of the local variable in the block. Return
8103  * RHS or NULL. If any local variable in the RHS has been killed in
8104  * intervening code, return NULL. If the variable being searched for is killed
8105  * in the intervening code, return NULL.
8106  *
8107  */
8108
8109 GenTree* Compiler::optFindLocalInit(BasicBlock* block,
8110                                     GenTree*    local,
8111                                     VARSET_TP*  pKilledInOut,
8112                                     bool*       pLhsRhsKilledAfterInit)
8113 {
8114     assert(pKilledInOut);
8115     assert(pLhsRhsKilledAfterInit);
8116
8117     *pLhsRhsKilledAfterInit = false;
8118
8119     unsigned LclNum = local->gtLclVarCommon.gtLclNum;
8120
8121     GenTree* list = block->bbTreeList;
8122     if (list == nullptr)
8123     {
8124         return nullptr;
8125     }
8126
8127     GenTree* rhs  = nullptr;
8128     GenTree* stmt = list;
8129     do
8130     {
8131         stmt = stmt->gtPrev;
8132         if (stmt == nullptr)
8133         {
8134             break;
8135         }
8136
8137         GenTree* tree = stmt->gtStmt.gtStmtExpr;
8138         // If we encounter an assignment to a local variable,
8139         if (tree->OperIsAssignment() && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
8140         {
8141             // And the assigned variable equals the input local,
8142             if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
8143             {
8144                 // If the assignment is '=' and it is not a conditional, then return rhs.
8145                 if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
8146                 {
8147                     rhs = tree->gtOp.gtOp2;
8148                 }
8149                 // If the assignment is 'op=' or a conditional equal, then the search ends here,
8150                 // as we found a kill to the input local.
8151                 else
8152                 {
8153                     *pLhsRhsKilledAfterInit = true;
8154                     assert(rhs == nullptr);
8155                 }
8156                 break;
8157             }
8158             else
8159             {
8160                 LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
8161                 if (varDsc == nullptr)
8162                 {
8163                     return nullptr;
8164                 }
8165                 VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
8166             }
8167         }
8168     } while (stmt != list);
8169
8170     if (rhs == nullptr)
8171     {
8172         return nullptr;
8173     }
8174
8175     // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
8176     varRefKinds rhsRefs = VR_NONE;
8177     VARSET_TP   rhsLocals(VarSetOps::UninitVal());
8178     bool        b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
8179     if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
8180     {
8181         // If RHS has been indirectly referenced, consider it a write and a kill.
8182         *pLhsRhsKilledAfterInit = true;
8183         return nullptr;
8184     }
8185
8186     return rhs;
8187 }
8188
8189 //------------------------------------------------------------------------------
8190 // optObtainLoopCloningOpts: Identify optimization candidates and update
8191 //      the "context" for array optimizations.
8192 //
8193 // Arguments:
8194 //     context     -  data structure where all loop cloning info is kept. The
8195 //                    optInfo fields of the context are updated with the
8196 //                    identified optimization candidates.
8197 //
8198 void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
8199 {
8200     for (unsigned i = 0; i < optLoopCount; i++)
8201     {
8202         JITDUMP("Considering loop %d to clone for optimizations.\n", i);
8203         if (optIsLoopClonable(i))
8204         {
8205             if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
8206             {
8207                 optIdentifyLoopOptInfo(i, context);
8208             }
8209         }
8210         JITDUMP("------------------------------------------------------------\n");
8211     }
8212     JITDUMP("\n");
8213 }
8214
8215 //------------------------------------------------------------------------
8216 // optIdentifyLoopOptInfo: Identify loop optimization candidates an also
8217 //      check if the loop is suitable for the optimizations performed.
8218 //
8219 // Arguments:
8220 //     loopNum     -  the current loop index for which conditions are derived.
8221 //     context     -  data structure where all loop cloning candidates will be
8222 //                    updated.
8223 //
8224 // Return Value:
8225 //     If the loop is not suitable for the optimizations, return false - context
8226 //     should not contain any optimization candidate for the loop if false.
8227 //     Else return true.
8228 //
8229 // Operation:
8230 //      Check if the loop is well formed for this optimization and identify the
8231 //      optimization candidates and update the "context" parameter with all the
8232 //      contextual information necessary to perform the optimization later.
8233 //
8234 bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
8235 {
8236     noway_assert(loopNum < optLoopCount);
8237
8238     LoopDsc* pLoop = &optLoopTable[loopNum];
8239
8240     if (!(pLoop->lpFlags & LPFLG_ITER))
8241     {
8242         JITDUMP("> No iter flag on loop %d.\n", loopNum);
8243         return false;
8244     }
8245
8246     unsigned ivLclNum = pLoop->lpIterVar();
8247     if (lvaVarAddrExposed(ivLclNum))
8248     {
8249         JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
8250         return false;
8251     }
8252
8253     BasicBlock* head = pLoop->lpHead;
8254     BasicBlock* end  = pLoop->lpBottom;
8255     BasicBlock* beg  = head->bbNext;
8256
8257     if (end->bbJumpKind != BBJ_COND)
8258     {
8259         JITDUMP("> Couldn't find termination test.\n");
8260         return false;
8261     }
8262
8263     if (end->bbJumpDest != beg)
8264     {
8265         JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
8266         return false;
8267     }
8268
8269     // TODO-CQ: CLONE: Mark increasing or decreasing loops.
8270     if ((
8271 #ifdef LEGACY_BACKEND
8272             pLoop->lpIterOper() != GT_ASG_ADD &&
8273 #endif
8274             pLoop->lpIterOper() != GT_ADD) ||
8275         (pLoop->lpIterConst() != 1))
8276     {
8277         JITDUMP("> Loop iteration operator not matching\n");
8278         return false;
8279     }
8280
8281     if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
8282         (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
8283     {
8284         JITDUMP("> Loop limit is neither constant, variable or array length\n");
8285         return false;
8286     }
8287
8288     if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) && (pLoop->lpIterOper() == GT_ADD
8289 #ifdef LEGACY_BACKEND
8290                                                                               || pLoop->lpIterOper() == GT_ASG_ADD
8291 #endif
8292                                                                               )) ||
8293           ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) && (pLoop->lpIterOper() == GT_SUB
8294 #ifdef LEGACY_BACKEND
8295                                                                               || pLoop->lpIterOper() == GT_ASG_SUB
8296 #endif
8297                                                                               ))))
8298     {
8299         JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
8300                 GenTree::OpName(pLoop->lpTestOper()), GenTree::OpName(pLoop->lpIterOper()));
8301         return false;
8302     }
8303
8304     if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
8305     {
8306         JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
8307                 pLoop->lpTestTree->gtTreeID);
8308         return false;
8309     }
8310
8311 #ifdef DEBUG
8312     GenTree* op1 = pLoop->lpIterator();
8313     noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
8314 #endif
8315
8316     JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
8317             end->bbNext ? end->bbNext->bbNum : 0);
8318
8319     LoopCloneVisitorInfo info(context, loopNum, nullptr);
8320     for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
8321     {
8322         compCurBB = block;
8323         for (GenTree* stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
8324         {
8325             info.stmt               = stmt;
8326             const bool lclVarsOnly  = false;
8327             const bool computeStack = false;
8328             fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, lclVarsOnly,
8329                           computeStack);
8330         }
8331     }
8332
8333     return true;
8334 }
8335
8336 //---------------------------------------------------------------------------------------------------------------
8337 //  optExtractArrIndex: Try to extract the array index from "tree".
8338 //
8339 //  Arguments:
8340 //      tree        the tree to be checked if it is the array [] operation.
8341 //      result      the extracted GT_INDEX information is updated in result.
8342 //      lhsNum      for the root level (function is recursive) callers should be BAD_VAR_NUM.
8343 //
8344 //  Return Value:
8345 //      Returns true if array index can be extracted, else, return false. See assumption about
8346 //      what will be extracted. The "result" variable's rank parameter is advanced for every
8347 //      dimension of [] encountered.
8348 //
8349 //  Operation:
8350 //      Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
8351 //      we have converted a GT_INDEX tree into a scaled index base offset expression. We need
8352 //      to reconstruct this to be able to know if this is an array access.
8353 //
8354 //  Assumption:
8355 //      The method extracts only if the array base and indices are GT_LCL_VAR.
8356 //
8357 //  TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
8358 //
8359 //    [000000001AF828D8] ---XG-------                     indir     int
8360 //    [000000001AF872C8] ------------                           const     long   16 Fseq[#FirstElem]
8361 //    [000000001AF87340] ------------                        +         byref
8362 //    [000000001AF87160] -------N----                                 const     long   2
8363 //    [000000001AF871D8] ------------                              <<        long
8364 //    [000000001AF870C0] ------------                                 cast      long <- int
8365 //    [000000001AF86F30] i-----------                                    lclVar    int    V04 loc0
8366 //    [000000001AF87250] ------------                           +         byref
8367 //    [000000001AF86EB8] ------------                              lclVar    ref    V01 arg1
8368 //    [000000001AF87468] ---XG-------                  comma     int
8369 //    [000000001AF87020] ---X--------                     arrBndsChk void
8370 //    [000000001AF86FA8] ---X--------                        arrLen    int
8371 //    [000000001AF827E8] ------------                           lclVar    ref    V01 arg1
8372 //    [000000001AF82860] ------------                        lclVar    int    V04 loc0
8373 //    [000000001AF829F0] -A-XG-------               =         int
8374 //    [000000001AF82978] D------N----                  lclVar    int    V06 tmp0
8375 //
8376 bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
8377 {
8378     if (tree->gtOper != GT_COMMA)
8379     {
8380         return false;
8381     }
8382     GenTree* before = tree->gtGetOp1();
8383     if (before->gtOper != GT_ARR_BOUNDS_CHECK)
8384     {
8385         return false;
8386     }
8387     GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
8388     if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
8389     {
8390         return false;
8391     }
8392
8393     // For span we may see gtArrLen is a local var or local field.
8394     // We won't try and extract those.
8395     const genTreeOps arrayOp = arrBndsChk->gtArrLen->gtOper;
8396
8397     if ((arrayOp == GT_LCL_VAR) || (arrayOp == GT_LCL_FLD))
8398     {
8399         return false;
8400     }
8401     if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
8402     {
8403         return false;
8404     }
8405     unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
8406     if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
8407     {
8408         return false;
8409     }
8410
8411     unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
8412
8413     GenTree* after = tree->gtGetOp2();
8414
8415     if (after->gtOper != GT_IND)
8416     {
8417         return false;
8418     }
8419     // It used to be the case that arrBndsChks for struct types would fail the previous check because
8420     // after->gtOper was an address (for a block op).  In order to avoid asmDiffs we will for now
8421     // return false if the type of 'after' is a struct type.  (This was causing us to clone loops
8422     // that we were not previously cloning.)
8423     // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
8424     // types.
8425     if (varTypeIsStruct(after))
8426     {
8427         return false;
8428     }
8429
8430     GenTree* sibo = after->gtGetOp1();
8431     if (sibo->gtOper != GT_ADD)
8432     {
8433         return false;
8434     }
8435     GenTree* sib = sibo->gtGetOp1();
8436     GenTree* ofs = sibo->gtGetOp2();
8437     if (ofs->gtOper != GT_CNS_INT)
8438     {
8439         return false;
8440     }
8441     if (sib->gtOper != GT_ADD)
8442     {
8443         return false;
8444     }
8445     GenTree* si   = sib->gtGetOp2();
8446     GenTree* base = sib->gtGetOp1();
8447     if (si->gtOper != GT_LSH)
8448     {
8449         return false;
8450     }
8451     if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
8452     {
8453         return false;
8454     }
8455     GenTree* scale = si->gtGetOp2();
8456     GenTree* index = si->gtGetOp1();
8457     if (scale->gtOper != GT_CNS_INT)
8458     {
8459         return false;
8460     }
8461 #ifdef _TARGET_AMD64_
8462     if (index->gtOper != GT_CAST)
8463     {
8464         return false;
8465     }
8466     GenTree* indexVar = index->gtGetOp1();
8467 #else
8468     GenTree* indexVar = index;
8469 #endif
8470     if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
8471     {
8472         return false;
8473     }
8474     if (lhsNum == BAD_VAR_NUM)
8475     {
8476         result->arrLcl = arrLcl;
8477     }
8478     result->indLcls.Push(indLcl);
8479     result->bndsChks.Push(tree);
8480     result->useBlock = compCurBB;
8481     result->rank++;
8482
8483     return true;
8484 }
8485
8486 //---------------------------------------------------------------------------------------------------------------
8487 //  optReconstructArrIndex: Reconstruct array index.
8488 //
8489 //  Arguments:
8490 //      tree        the tree to be checked if it is an array [][][] operation.
8491 //      result      the extracted GT_INDEX information.
8492 //      lhsNum      for the root level (function is recursive) callers should be BAD_VAR_NUM.
8493 //
8494 //  Return Value:
8495 //      Returns true if array index can be extracted, else, return false. "rank" field in
8496 //      "result" contains the array access depth. The "indLcls" fields contain the indices.
8497 //
8498 //  Operation:
8499 //      Recursively look for a list of array indices. In the example below, we encounter,
8500 //      V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
8501 //      The return value would then be:
8502 //      ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
8503 //
8504 //      V00[V01][V02] would be morphed as:
8505 //
8506 //      [000000001B366848] ---XG-------                        indir     int
8507 //      [000000001B36BC50] ------------                                 V05 + (V02 << 2) + 16
8508 //      [000000001B36C200] ---XG-------                     comma     int
8509 //      [000000001B36BDB8] ---X--------                        arrBndsChk(V05, V02)
8510 //      [000000001B36C278] -A-XG-------                  comma     int
8511 //      [000000001B366730] R--XG-------                           indir     ref
8512 //      [000000001B36C2F0] ------------                             V00 + (V01 << 3) + 24
8513 //      [000000001B36C818] ---XG-------                        comma     ref
8514 //      [000000001B36C458] ---X--------                           arrBndsChk(V00, V01)
8515 //      [000000001B36BB60] -A-XG-------                     =         ref
8516 //      [000000001B36BAE8] D------N----                        lclVar    ref    V05 tmp2
8517 //      [000000001B36A668] -A-XG-------               =         int
8518 //      [000000001B36A5F0] D------N----                  lclVar    int    V03 tmp0
8519 //
8520 //  Assumption:
8521 //      The method extracts only if the array base and indices are GT_LCL_VAR.
8522 //
8523 bool Compiler::optReconstructArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum)
8524 {
8525     // If we can extract "tree" (which is a top level comma) return.
8526     if (optExtractArrIndex(tree, result, lhsNum))
8527     {
8528         return true;
8529     }
8530     // We have a comma (check if array base expr is computed in "before"), descend further.
8531     else if (tree->OperGet() == GT_COMMA)
8532     {
8533         GenTree* before = tree->gtGetOp1();
8534         // "before" should evaluate an array base for the "after" indexing.
8535         if (before->OperGet() != GT_ASG)
8536         {
8537             return false;
8538         }
8539         GenTree* lhs = before->gtGetOp1();
8540         GenTree* rhs = before->gtGetOp2();
8541
8542         // "rhs" should contain an GT_INDEX
8543         if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
8544         {
8545             return false;
8546         }
8547         unsigned lhsNum = lhs->gtLclVarCommon.gtLclNum;
8548         GenTree* after  = tree->gtGetOp2();
8549         // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
8550         return optExtractArrIndex(after, result, lhsNum);
8551     }
8552     return false;
8553 }
8554
8555 /* static */
8556 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pTree, Compiler::fgWalkData* data)
8557 {
8558     return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
8559 }
8560
8561 //-------------------------------------------------------------------------
8562 //  optIsStackLocalInvariant: Is stack local invariant in loop.
8563 //
8564 //  Arguments:
8565 //      loopNum      The loop in which the variable is tested for invariance.
8566 //      lclNum       The local that is tested for invariance in the loop.
8567 //
8568 //  Return Value:
8569 //      Returns true if the variable is loop invariant in loopNum.
8570 //
8571 bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
8572 {
8573     if (lvaVarAddrExposed(lclNum))
8574     {
8575         return false;
8576     }
8577     if (optIsVarAssgLoop(loopNum, lclNum))
8578     {
8579         return false;
8580     }
8581     return true;
8582 }
8583
8584 //----------------------------------------------------------------------------------------------
8585 //  optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
8586 //      identify as potential candidate and update the loop context.
8587 //
8588 //  Arguments:
8589 //      tree         The tree encountered during the tree walk.
8590 //      info         Supplies information about the current block or stmt in which the tree is.
8591 //                   Also supplies the "context" pointer for updating with loop cloning
8592 //                   candidates. Also supplies loopNum.
8593 //
8594 //  Operation:
8595 //      If array index can be reconstructed, check if the iter var of the loop matches the
8596 //      array index var in some dim. Also ensure other index vars before the identified
8597 //      dim are loop invariant.
8598 //
8599 //  Return Value:
8600 //      Skip sub trees if the optimization candidate is identified or else continue walking
8601 //
8602 Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info)
8603 {
8604     ArrIndex arrIndex(getAllocator());
8605
8606     // Check if array index can be optimized.
8607     if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
8608     {
8609         assert(tree->gtOper == GT_COMMA);
8610 #ifdef DEBUG
8611         if (verbose)
8612         {
8613             JITDUMP("Found ArrIndex at tree ");
8614             printTreeID(tree);
8615             printf(" which is equivalent to: ");
8616             arrIndex.Print();
8617             JITDUMP("\n");
8618         }
8619 #endif
8620         if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
8621         {
8622             return WALK_SKIP_SUBTREES;
8623         }
8624
8625         // Walk the dimensions and see if iterVar of the loop is used as index.
8626         for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
8627         {
8628             // Is index variable also used as the loop iter var.
8629             if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
8630             {
8631                 // Check the previous indices are all loop invariant.
8632                 for (unsigned dim2 = 0; dim2 < dim; ++dim2)
8633                 {
8634                     if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
8635                     {
8636                         JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
8637                         return WALK_SKIP_SUBTREES;
8638                     }
8639                 }
8640 #ifdef DEBUG
8641                 if (verbose)
8642                 {
8643                     JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
8644                     arrIndex.Print();
8645                     JITDUMP(" on dim %d\n", dim);
8646                 }
8647 #endif
8648                 // Update the loop context.
8649                 info->context->EnsureLoopOptInfo(info->loopNum)
8650                     ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
8651             }
8652             else
8653             {
8654                 JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
8655                         dim);
8656             }
8657         }
8658         return WALK_SKIP_SUBTREES;
8659     }
8660     else if (tree->gtOper == GT_ARR_ELEM)
8661     {
8662         // TODO-CQ: CLONE: Implement.
8663         return WALK_SKIP_SUBTREES;
8664     }
8665     return WALK_CONTINUE;
8666 }
8667
8668 struct optRangeCheckDsc
8669 {
8670     Compiler* pCompiler;
8671     bool      bValidIndex;
8672 };
8673 /*
8674     Walk to make sure that only locals and constants are contained in the index
8675     for a range check
8676 */
8677 Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTree** pTree, fgWalkData* data)
8678 {
8679     GenTree*          tree  = *pTree;
8680     optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
8681
8682     if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
8683     {
8684         pData->bValidIndex = false;
8685         return WALK_ABORT;
8686     }
8687
8688     if (tree->gtOper == GT_LCL_VAR)
8689     {
8690         if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
8691         {
8692             pData->bValidIndex = false;
8693             return WALK_ABORT;
8694         }
8695     }
8696
8697     return WALK_CONTINUE;
8698 }
8699
8700 /*
8701     returns true if a range check can legally be removed (for the moment it checks
8702     that the array is a local array (non subject to racing conditions) and that the
8703     index is either a constant or a local
8704 */
8705 bool Compiler::optIsRangeCheckRemovable(GenTree* tree)
8706 {
8707     noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
8708     GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
8709     GenTree*          pArray  = bndsChk->GetArray();
8710     if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
8711     {
8712         return false;
8713     }
8714     GenTree* pIndex = bndsChk->gtIndex;
8715
8716     // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
8717     // Otherwise we can be targeted by malicious race-conditions.
8718     if (pArray != nullptr)
8719     {
8720         if (pArray->gtOper != GT_LCL_VAR)
8721         {
8722
8723 #ifdef DEBUG
8724             if (verbose)
8725             {
8726                 printf("Can't remove range check if the array isn't referenced with a local\n");
8727                 gtDispTree(pArray);
8728             }
8729 #endif
8730             return false;
8731         }
8732         else
8733         {
8734             noway_assert(pArray->gtType == TYP_REF);
8735             noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
8736
8737             if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
8738             {
8739                 // If the array address has been taken, don't do the optimization
8740                 // (this restriction can be lowered a bit, but i don't think it's worth it)
8741                 CLANG_FORMAT_COMMENT_ANCHOR;
8742 #ifdef DEBUG
8743                 if (verbose)
8744                 {
8745                     printf("Can't remove range check if the array has its address taken\n");
8746                     gtDispTree(pArray);
8747                 }
8748 #endif
8749                 return false;
8750             }
8751         }
8752     }
8753
8754     optRangeCheckDsc Data;
8755     Data.pCompiler   = this;
8756     Data.bValidIndex = true;
8757
8758     fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
8759
8760     if (!Data.bValidIndex)
8761     {
8762 #ifdef DEBUG
8763         if (verbose)
8764         {
8765             printf("Can't remove range check with this index");
8766             gtDispTree(pIndex);
8767         }
8768 #endif
8769
8770         return false;
8771     }
8772
8773     return true;
8774 }
8775
8776 /******************************************************************************
8777  *
8778  * Replace x==null with (x|x)==0 if x is a GC-type.
8779  * This will stress code-gen and the emitter to make sure they support such trees.
8780  */
8781
8782 #ifdef DEBUG
8783
8784 void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
8785 {
8786     if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
8787     {
8788         return;
8789     }
8790
8791     noway_assert(condBlock->bbJumpKind == BBJ_COND);
8792     GenTree* condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
8793
8794     noway_assert(condStmt->gtOper == GT_JTRUE);
8795
8796     bool     isBool;
8797     GenTree* relop;
8798
8799     GenTree* comparand = optIsBoolCond(condStmt, &relop, &isBool);
8800
8801     if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
8802     {
8803         return;
8804     }
8805
8806     if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
8807     {
8808         return;
8809     }
8810
8811     GenTree* comparandClone = gtCloneExpr(comparand);
8812
8813     // Bump up the ref-counts of any variables in 'comparandClone'
8814     compCurBB = condBlock;
8815     IncLclVarRefCountsVisitor::WalkTree(this, comparandClone);
8816
8817     noway_assert(relop->gtOp.gtOp1 == comparand);
8818     genTreeOps oper   = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
8819     relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
8820
8821     // Comparand type is already checked, and we have const int, there is no harm
8822     // morphing it into a TYP_I_IMPL.
8823     noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
8824     relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
8825 }
8826
8827 #endif
8828
8829 /******************************************************************************
8830  * Function used by folding of boolean conditionals
8831  * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
8832  *    "if (boolVal ==/!=  0/1)". This is translated into a GT_EQ node with "op1"
8833  *    being a boolean lclVar and "op2" the const 0/1.
8834  * On success, the comparand (ie. boolVal) is returned.   Else NULL.
8835  * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
8836  * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
8837  * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
8838  * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
8839  */
8840
8841 GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
8842 {
8843     bool isBool = false;
8844
8845     noway_assert(condBranch->gtOper == GT_JTRUE);
8846     GenTree* cond = condBranch->gtOp.gtOp1;
8847
8848     /* The condition must be "!= 0" or "== 0" */
8849
8850     if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
8851     {
8852         return nullptr;
8853     }
8854
8855     /* Return the compare node to the caller */
8856
8857     *compPtr = cond;
8858
8859     /* Get hold of the comparands */
8860
8861     GenTree* opr1 = cond->gtOp.gtOp1;
8862     GenTree* opr2 = cond->gtOp.gtOp2;
8863
8864     if (opr2->gtOper != GT_CNS_INT)
8865     {
8866         return nullptr;
8867     }
8868
8869     if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
8870     {
8871         return nullptr;
8872     }
8873
8874     ssize_t ival2 = opr2->gtIntCon.gtIconVal;
8875
8876     /* Is the value a boolean?
8877      * We can either have a boolean expression (marked GTF_BOOLEAN) or
8878      * a local variable that is marked as being boolean (lvIsBoolean) */
8879
8880     if (opr1->gtFlags & GTF_BOOLEAN)
8881     {
8882         isBool = true;
8883     }
8884     else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
8885     {
8886         isBool = true;
8887     }
8888     else if (opr1->gtOper == GT_LCL_VAR)
8889     {
8890         /* is it a boolean local variable */
8891
8892         unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
8893         noway_assert(lclNum < lvaCount);
8894
8895         if (lvaTable[lclNum].lvIsBoolean)
8896         {
8897             isBool = true;
8898         }
8899     }
8900
8901     /* Was our comparison against the constant 1 (i.e. true) */
8902     if (ival2 == 1)
8903     {
8904         // If this is a boolean expression tree we can reverse the relop
8905         // and change the true to false.
8906         if (isBool)
8907         {
8908             gtReverseCond(cond);
8909             opr2->gtIntCon.gtIconVal = 0;
8910         }
8911         else
8912         {
8913             return nullptr;
8914         }
8915     }
8916
8917     *boolPtr = isBool;
8918     return opr1;
8919 }
8920
8921 void Compiler::optOptimizeBools()
8922 {
8923 #ifdef DEBUG
8924     if (verbose)
8925     {
8926         printf("*************** In optOptimizeBools()\n");
8927         if (verboseTrees)
8928         {
8929             printf("Blocks/Trees before phase\n");
8930             fgDispBasicBlocks(true);
8931         }
8932     }
8933 #endif
8934     bool change;
8935
8936     do
8937     {
8938         change = false;
8939
8940         for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
8941         {
8942             /* We're only interested in conditional jumps here */
8943
8944             if (b1->bbJumpKind != BBJ_COND)
8945             {
8946                 continue;
8947             }
8948
8949             /* If there is no next block, we're done */
8950
8951             BasicBlock* b2 = b1->bbNext;
8952             if (!b2)
8953             {
8954                 break;
8955             }
8956
8957             /* The next block must not be marked as BBF_DONT_REMOVE */
8958             if (b2->bbFlags & BBF_DONT_REMOVE)
8959             {
8960                 continue;
8961             }
8962
8963             /* The next block also needs to be a condition */
8964
8965             if (b2->bbJumpKind != BBJ_COND)
8966             {
8967 #ifdef DEBUG
8968                 optOptimizeBoolsGcStress(b1);
8969 #endif
8970                 continue;
8971             }
8972
8973             bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
8974
8975             if (b1->bbJumpDest == b2->bbJumpDest)
8976             {
8977                 /* Given the following sequence of blocks :
8978                         B1: brtrue(t1, BX)
8979                         B2: brtrue(t2, BX)
8980                         B3:
8981                    we wil try to fold it to :
8982                         B1: brtrue(t1|t2, BX)
8983                         B3:
8984                 */
8985
8986                 sameTarget = true;
8987             }
8988             else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
8989             {
8990                 /* Given the following sequence of blocks :
8991                         B1: brtrue(t1, B3)
8992                         B2: brtrue(t2, BX)
8993                         B3:
8994                    we will try to fold it to :
8995                         B1: brtrue((!t1)&&t2, BX)
8996                         B3:
8997                 */
8998
8999                 sameTarget = false;
9000             }
9001             else
9002             {
9003                 continue;
9004             }
9005
9006             /* The second block must contain a single statement */
9007
9008             GenTree* s2 = b2->bbTreeList;
9009             if (s2->gtPrev != s2)
9010             {
9011                 continue;
9012             }
9013
9014             noway_assert(s2->gtOper == GT_STMT);
9015             GenTree* t2 = s2->gtStmt.gtStmtExpr;
9016             noway_assert(t2->gtOper == GT_JTRUE);
9017
9018             /* Find the condition for the first block */
9019
9020             GenTree* s1 = b1->bbTreeList->gtPrev;
9021
9022             noway_assert(s1->gtOper == GT_STMT);
9023             GenTree* t1 = s1->gtStmt.gtStmtExpr;
9024             noway_assert(t1->gtOper == GT_JTRUE);
9025
9026             if (b2->countOfInEdges() > 1)
9027             {
9028                 continue;
9029             }
9030
9031             /* Find the branch conditions of b1 and b2 */
9032
9033             bool bool1, bool2;
9034
9035             GenTree* c1 = optIsBoolCond(t1, &t1, &bool1);
9036             if (!c1)
9037             {
9038                 continue;
9039             }
9040
9041             GenTree* c2 = optIsBoolCond(t2, &t2, &bool2);
9042             if (!c2)
9043             {
9044                 continue;
9045             }
9046
9047             noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
9048             noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
9049
9050             // Leave out floats where the bit-representation is more complicated
9051             // - there are two representations for 0.
9052             //
9053             if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
9054             {
9055                 continue;
9056             }
9057
9058             // Make sure the types involved are of the same sizes
9059             if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
9060             {
9061                 continue;
9062             }
9063             if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
9064             {
9065                 continue;
9066             }
9067 #ifdef _TARGET_ARMARCH_
9068             // Skip the small operand which we cannot encode.
9069             if (varTypeIsSmall(c1->TypeGet()))
9070                 continue;
9071 #endif
9072             /* The second condition must not contain side effects */
9073
9074             if (c2->gtFlags & GTF_GLOB_EFFECT)
9075             {
9076                 continue;
9077             }
9078
9079             /* The second condition must not be too expensive */
9080
9081             gtPrepareCost(c2);
9082
9083             if (c2->gtCostEx > 12)
9084             {
9085                 continue;
9086             }
9087
9088             genTreeOps foldOp;
9089             genTreeOps cmpOp;
9090             var_types  foldType = c1->TypeGet();
9091             if (varTypeIsGC(foldType))
9092             {
9093                 foldType = TYP_I_IMPL;
9094             }
9095
9096             if (sameTarget)
9097             {
9098                 /* Both conditions must be the same */
9099
9100                 if (t1->gtOper != t2->gtOper)
9101                 {
9102                     continue;
9103                 }
9104
9105                 if (t1->gtOper == GT_EQ)
9106                 {
9107                     /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
9108                        So we will branch to BX if (c1&c2)==0 */
9109
9110                     foldOp = GT_AND;
9111                     cmpOp  = GT_EQ;
9112                 }
9113                 else
9114                 {
9115                     /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
9116                        So we will branch to BX if (c1|c2)!=0 */
9117
9118                     foldOp = GT_OR;
9119                     cmpOp  = GT_NE;
9120                 }
9121             }
9122             else
9123             {
9124                 /* The b1 condition must be the reverse of the b2 condition */
9125
9126                 if (t1->gtOper == t2->gtOper)
9127                 {
9128                     continue;
9129                 }
9130
9131                 if (t1->gtOper == GT_EQ)
9132                 {
9133                     /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
9134                        So we will branch to BX if (c1&c2)!=0 */
9135
9136                     foldOp = GT_AND;
9137                     cmpOp  = GT_NE;
9138                 }
9139                 else
9140                 {
9141                     /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
9142                        So we will branch to BX if (c1|c2)==0 */
9143
9144                     foldOp = GT_OR;
9145                     cmpOp  = GT_EQ;
9146                 }
9147             }
9148
9149             // Anding requires both values to be 0 or 1
9150
9151             if ((foldOp == GT_AND) && (!bool1 || !bool2))
9152             {
9153                 continue;
9154             }
9155
9156             //
9157             // Now update the trees
9158             //
9159             GenTree* cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
9160             if (bool1 && bool2)
9161             {
9162                 /* When we 'OR'/'AND' two booleans, the result is boolean as well */
9163                 cmpOp1->gtFlags |= GTF_BOOLEAN;
9164             }
9165
9166             t1->SetOper(cmpOp);
9167             t1->gtOp.gtOp1         = cmpOp1;
9168             t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
9169
9170 #if FEATURE_SET_FLAGS
9171             // For comparisons against zero we will have the GTF_SET_FLAGS set
9172             // and this can cause an assert to fire in fgMoveOpsLeft(GenTree* tree)
9173             // during the CSE phase.
9174             //
9175             // So make sure to clear any GTF_SET_FLAGS bit on these operations
9176             // as they are no longer feeding directly into a comparisons against zero
9177
9178             // Make sure that the GTF_SET_FLAGS bit is cleared.
9179             // Fix 388436 ARM JitStress WP7
9180             c1->gtFlags &= ~GTF_SET_FLAGS;
9181             c2->gtFlags &= ~GTF_SET_FLAGS;
9182
9183             // The new top level node that we just created does feed directly into
9184             // a comparison against zero, so set the GTF_SET_FLAGS bit so that
9185             // we generate an instuction that sets the flags, which allows us
9186             // to omit the cmp with zero instruction.
9187
9188             // Request that the codegen for cmpOp1 sets the condition flags
9189             // when it generates the code for cmpOp1.
9190             //
9191             cmpOp1->gtRequestSetFlags();
9192 #endif
9193
9194             flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
9195             flowList* edge2;
9196
9197             /* Modify the target of the conditional jump and update bbRefs and bbPreds */
9198
9199             if (sameTarget)
9200             {
9201                 edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
9202             }
9203             else
9204             {
9205                 edge2 = fgGetPredForBlock(b2->bbNext, b2);
9206
9207                 fgRemoveRefPred(b1->bbJumpDest, b1);
9208
9209                 b1->bbJumpDest = b2->bbJumpDest;
9210
9211                 fgAddRefPred(b2->bbJumpDest, b1);
9212             }
9213
9214             noway_assert(edge1 != nullptr);
9215             noway_assert(edge2 != nullptr);
9216
9217             BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
9218             BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
9219             if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
9220             {
9221                 edge1->flEdgeWeightMin = edgeSumMin;
9222                 edge1->flEdgeWeightMax = edgeSumMax;
9223             }
9224             else
9225             {
9226                 edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
9227                 edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
9228             }
9229
9230             /* Get rid of the second block (which is a BBJ_COND) */
9231
9232             noway_assert(b1->bbJumpKind == BBJ_COND);
9233             noway_assert(b2->bbJumpKind == BBJ_COND);
9234             noway_assert(b1->bbJumpDest == b2->bbJumpDest);
9235             noway_assert(b1->bbNext == b2);
9236             noway_assert(b2->bbNext);
9237
9238             fgUnlinkBlock(b2);
9239             b2->bbFlags |= BBF_REMOVED;
9240
9241             // If b2 was the last block of a try or handler, update the EH table.
9242
9243             ehUpdateForDeletedBlock(b2);
9244
9245             /* Update bbRefs and bbPreds */
9246
9247             /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
9248              * Remove  pred 'b2' for 'b2->bbJumpDest' */
9249
9250             fgReplacePred(b2->bbNext, b2, b1);
9251
9252             fgRemoveRefPred(b2->bbJumpDest, b2);
9253
9254             /* Update the block numbers and try again */
9255
9256             change = true;
9257             /*
9258                         do
9259                         {
9260                             b2->bbNum = ++n1;
9261                             b2 = b2->bbNext;
9262                         }
9263                         while (b2);
9264             */
9265
9266             // Update loop table
9267             fgUpdateLoopsAfterCompacting(b1, b2);
9268
9269 #ifdef DEBUG
9270             if (verbose)
9271             {
9272                 printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
9273                        b1->bbNum, b2->bbNum);
9274                 gtDispTree(s1);
9275                 printf("\n");
9276             }
9277 #endif
9278         }
9279     } while (change);
9280
9281 #ifdef DEBUG
9282     fgDebugCheckBBlist();
9283 #endif
9284 }