From e50e96340963b15401a1c1032fa1b35c8beead2c Mon Sep 17 00:00:00 2001 From: Joseph Tremoulet Date: Wed, 19 Oct 2016 14:21:07 -0400 Subject: [PATCH] Unroll loops in inner-to-outer order There's no need for fixpoint iteration; the loop indices are a pre-order, so walking them in reverse order will visit inner loops before outer ones. Commit migrated from https://github.com/dotnet/coreclr/commit/56dac6e09a732cf371baae2b0d696efb93861824 --- src/coreclr/src/jit/optimizer.cpp | 470 +++++++++++++++++++------------------- 1 file changed, 230 insertions(+), 240 deletions(-) diff --git a/src/coreclr/src/jit/optimizer.cpp b/src/coreclr/src/jit/optimizer.cpp index 51bec96..5b5fb8b 100644 --- a/src/coreclr/src/jit/optimizer.cpp +++ b/src/coreclr/src/jit/optimizer.cpp @@ -2834,299 +2834,294 @@ void Compiler::optUnrollLoops() #endif /* Look for loop unrolling candidates */ - /* Double loop so that after unrolling an inner loop we set change to true - * and we then go back over all of the loop candidates and try to unroll - * the next outer loop, until we don't unroll any loops, - * then change will be false and we are done. - */ - for (;;) - { - bool change = false; + bool change = false; + + // Visit loops from highest to lowest number to vist them in innermost + // to outermost order + for (unsigned lnum = optLoopCount - 1; lnum != ~0U; --lnum) + { + BasicBlock* block; + BasicBlock* head; + BasicBlock* bottom; + + GenTree* loop; + GenTree* test; + GenTree* incr; + GenTree* phdr; + GenTree* init; + + bool dupCond; + int lval; + int lbeg; // initial value for iterator + int llim; // limit value for iterator + unsigned lvar; // iterator lclVar # + int iterInc; // value to increment the iterator + genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.) + var_types iterOperType; // type result of the oper (for overflow instrs) + genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.) + bool unsTest; // Is the comparison u/int + + unsigned totalIter; // total number of iterations in the constant loop + unsigned loopCostSz; // Cost is size of one iteration + unsigned loopFlags; // actual lpFlags + unsigned requiredFlags; // required lpFlags + + GenTree* loopList; // new stmt list of the unrolled loop + GenTree* loopLast; + + static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = { + 10, // BLENDED_CODE + 0, // SMALL_CODE + 20, // FAST_CODE + 0 // COUNT_OPT_CODE + }; + + noway_assert(ITER_LIMIT[SMALL_CODE] == 0); + noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0); + + unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()]; - for (unsigned lnum = 0; lnum < optLoopCount; lnum++) +#ifdef DEBUG + if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) { - BasicBlock* block; - BasicBlock* head; - BasicBlock* bottom; - - GenTree* loop; - GenTree* test; - GenTree* incr; - GenTree* phdr; - GenTree* init; - - bool dupCond; - int lval; - int lbeg; // initial value for iterator - int llim; // limit value for iterator - unsigned lvar; // iterator lclVar # - int iterInc; // value to increment the iterator - genTreeOps iterOper; // type of iterator increment (i.e. ADD, SUB, etc.) - var_types iterOperType; // type result of the oper (for overflow instrs) - genTreeOps testOper; // type of loop test (i.e. GT_LE, GT_GE, etc.) - bool unsTest; // Is the comparison u/int - - unsigned totalIter; // total number of iterations in the constant loop - unsigned loopCostSz; // Cost is size of one iteration - unsigned loopFlags; // actual lpFlags - unsigned requiredFlags; // required lpFlags - - GenTree* loopList; // new stmt list of the unrolled loop - GenTree* loopLast; + iterLimit *= 10; + } +#endif - static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = { - 10, // BLENDED_CODE - 0, // SMALL_CODE - 20, // FAST_CODE - 0 // COUNT_OPT_CODE - }; + static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = { + 30, // BLENDED_CODE + 0, // SMALL_CODE + 60, // FAST_CODE + 0 // COUNT_OPT_CODE + }; - noway_assert(ITER_LIMIT[SMALL_CODE] == 0); - noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0); + noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0); + noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0); - unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()]; + int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()]; #ifdef DEBUG - if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) - { - iterLimit *= 10; - } + if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) + { + unrollLimitSz *= 10; + } #endif - static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = { - 30, // BLENDED_CODE - 0, // SMALL_CODE - 60, // FAST_CODE - 0 // COUNT_OPT_CODE - }; - - noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0); - noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0); + loopFlags = optLoopTable[lnum].lpFlags; + requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST; - int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()]; + /* Ignore the loop if we don't have a do-while with a single exit + that has a constant number of iterations */ -#ifdef DEBUG - if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) - { - unrollLimitSz *= 10; - } -#endif + if ((loopFlags & requiredFlags) != requiredFlags) + { + continue; + } - loopFlags = optLoopTable[lnum].lpFlags; - requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST; + /* ignore if removed or marked as not unrollable */ - /* Ignore the loop if we don't have a do-while with a single exit - that has a constant number of iterations */ + if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED)) + { + continue; + } - if ((loopFlags & requiredFlags) != requiredFlags) - { - continue; - } + head = optLoopTable[lnum].lpHead; + noway_assert(head); + bottom = optLoopTable[lnum].lpBottom; + noway_assert(bottom); - /* ignore if removed or marked as not unrollable */ + /* The single exit must be at the bottom of the loop */ + noway_assert(optLoopTable[lnum].lpExit); + if (optLoopTable[lnum].lpExit != bottom) + { + continue; + } - if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED)) - { - continue; - } + /* Unrolling loops with jumps in them is not worth the headache + * Later we might consider unrolling loops after un-switching */ - head = optLoopTable[lnum].lpHead; - noway_assert(head); - bottom = optLoopTable[lnum].lpBottom; - noway_assert(bottom); + block = head; + do + { + block = block->bbNext; + noway_assert(block); - /* The single exit must be at the bottom of the loop */ - noway_assert(optLoopTable[lnum].lpExit); - if (optLoopTable[lnum].lpExit != bottom) + if (block->bbJumpKind != BBJ_NONE) { - continue; + if (block != bottom) + { + goto DONE_LOOP; + } } + } while (block != bottom); - /* Unrolling loops with jumps in them is not worth the headache - * Later we might consider unrolling loops after un-switching */ + /* Get the loop data: + - initial constant + - limit constant + - iterator + - iterator increment + - increment operation type (i.e. ADD, SUB, etc...) + - loop test type (i.e. GT_GE, GT_LT, etc...) + */ - block = head; - do - { - block = block->bbNext; - noway_assert(block); + lbeg = optLoopTable[lnum].lpConstInit; + llim = optLoopTable[lnum].lpConstLimit(); + testOper = optLoopTable[lnum].lpTestOper(); - if (block->bbJumpKind != BBJ_NONE) - { - if (block != bottom) - { - goto DONE_LOOP; - } - } - } while (block != bottom); + lvar = optLoopTable[lnum].lpIterVar(); + iterInc = optLoopTable[lnum].lpIterConst(); + iterOper = optLoopTable[lnum].lpIterOper(); - /* Get the loop data: - - initial constant - - limit constant - - iterator - - iterator increment - - increment operation type (i.e. ADD, SUB, etc...) - - loop test type (i.e. GT_GE, GT_LT, etc...) - */ + iterOperType = optLoopTable[lnum].lpIterOperType(); + unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0; - lbeg = optLoopTable[lnum].lpConstInit; - llim = optLoopTable[lnum].lpConstLimit(); - testOper = optLoopTable[lnum].lpTestOper(); + if (lvaTable[lvar].lvAddrExposed) + { // If the loop iteration variable is address-exposed then bail + continue; + } + if (lvaTable[lvar].lvIsStructField) + { // If the loop iteration variable is a promoted field from a struct then + // bail + continue; + } - lvar = optLoopTable[lnum].lpIterVar(); - iterInc = optLoopTable[lnum].lpIterConst(); - iterOper = optLoopTable[lnum].lpIterOper(); + /* Locate the pre-header and initialization and increment/test statements */ - iterOperType = optLoopTable[lnum].lpIterOperType(); - unsTest = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0; + phdr = head->bbTreeList; + noway_assert(phdr); + loop = bottom->bbTreeList; + noway_assert(loop); - if (lvaTable[lvar].lvAddrExposed) - { // If the loop iteration variable is address-exposed then bail - continue; - } - if (lvaTable[lvar].lvIsStructField) - { // If the loop iteration variable is a promoted field from a struct then - // bail - continue; - } + init = head->lastStmt(); + noway_assert(init && (init->gtNext == nullptr)); + test = bottom->lastStmt(); + noway_assert(test && (test->gtNext == nullptr)); + incr = test->gtPrev; + noway_assert(incr); - /* Locate the pre-header and initialization and increment/test statements */ + if (init->gtFlags & GTF_STMT_CMPADD) + { + /* Must be a duplicated loop condition */ + noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); - phdr = head->bbTreeList; - noway_assert(phdr); - loop = bottom->bbTreeList; - noway_assert(loop); + dupCond = true; + init = init->gtPrev; + noway_assert(init); + } + else + { + dupCond = false; + } - init = head->lastStmt(); - noway_assert(init && (init->gtNext == nullptr)); - test = bottom->lastStmt(); - noway_assert(test && (test->gtNext == nullptr)); - incr = test->gtPrev; - noway_assert(incr); + /* Find the number of iterations - the function returns false if not a constant number */ - if (init->gtFlags & GTF_STMT_CMPADD) - { - /* Must be a duplicated loop condition */ - noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); - - dupCond = true; - init = init->gtPrev; - noway_assert(init); - } - else - { - dupCond = false; - } + if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter)) + { + continue; + } - /* Find the number of iterations - the function returns false if not a constant number */ + /* Forget it if there are too many repetitions or not a constant loop */ - if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter)) - { - continue; - } + if (totalIter > iterLimit) + { + continue; + } - /* Forget it if there are too many repetitions or not a constant loop */ + noway_assert(init->gtOper == GT_STMT); + init = init->gtStmt.gtStmtExpr; + noway_assert(test->gtOper == GT_STMT); + test = test->gtStmt.gtStmtExpr; + noway_assert(incr->gtOper == GT_STMT); + incr = incr->gtStmt.gtStmtExpr; - if (totalIter > iterLimit) - { - continue; - } + // Don't unroll loops we don't understand. + if (incr->gtOper != GT_ASG) + { + continue; + } + incr = incr->gtOp.gtOp2; - noway_assert(init->gtOper == GT_STMT); - init = init->gtStmt.gtStmtExpr; - noway_assert(test->gtOper == GT_STMT); - test = test->gtStmt.gtStmtExpr; - noway_assert(incr->gtOper == GT_STMT); - incr = incr->gtStmt.gtStmtExpr; + /* Make sure everything looks ok */ + if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) || + (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) || + (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) || - // Don't unroll loops we don't understand. - if (incr->gtOper != GT_ASG) - { - continue; - } - incr = incr->gtOp.gtOp2; + !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || + (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || + (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) || - /* Make sure everything looks ok */ - if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) || - (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) || - (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) || + (test->gtOper != GT_JTRUE)) + { + noway_assert(!"Bad precondition in Compiler::optUnrollLoops()"); + continue; + } - !((incr->gtOper == GT_ADD) || (incr->gtOper == GT_SUB)) || (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || - (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || - (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) || + /* heuristic - Estimated cost in code size of the unrolled loop */ - (test->gtOper != GT_JTRUE)) - { - noway_assert(!"Bad precondition in Compiler::optUnrollLoops()"); - continue; - } + loopCostSz = 0; - /* heuristic - Estimated cost in code size of the unrolled loop */ + block = head; - loopCostSz = 0; + do + { + block = block->bbNext; - block = head; + /* Visit all the statements in the block */ - do + for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) { - block = block->bbNext; + /* Get the expression and stop if end reached */ - /* Visit all the statements in the block */ - - for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt) + GenTreePtr expr = stmt->gtStmtExpr; + if (expr == incr) { - /* Get the expression and stop if end reached */ - - GenTreePtr expr = stmt->gtStmtExpr; - if (expr == incr) - { - break; - } + break; + } - /* Calculate gtCostSz */ - gtSetStmtInfo(stmt); + /* Calculate gtCostSz */ + gtSetStmtInfo(stmt); - /* Update loopCostSz */ - loopCostSz += stmt->gtCostSz; - } - } while (block != bottom); + /* Update loopCostSz */ + loopCostSz += stmt->gtCostSz; + } + } while (block != bottom); - /* Compute the estimated increase in code size for the unrolled loop */ + /* Compute the estimated increase in code size for the unrolled loop */ - unsigned int fixedLoopCostSz; - fixedLoopCostSz = 8; + unsigned int fixedLoopCostSz; + fixedLoopCostSz = 8; - int unrollCostSz; - unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz); + int unrollCostSz; + unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz); - /* Don't unroll if too much code duplication would result. */ + /* Don't unroll if too much code duplication would result. */ - if (unrollCostSz > unrollLimitSz) - { - /* prevent this loop from being revisited */ - optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; - goto DONE_LOOP; - } + if (unrollCostSz > unrollLimitSz) + { + /* prevent this loop from being revisited */ + optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL; + goto DONE_LOOP; + } - /* Looks like a good idea to unroll this loop, let's do it! */ - CLANG_FORMAT_COMMENT_ANCHOR; + /* Looks like a good idea to unroll this loop, let's do it! */ + CLANG_FORMAT_COMMENT_ANCHOR; #ifdef DEBUG - if (verbose) + if (verbose) + { + printf("\nUnrolling loop BB%02u", head->bbNext->bbNum); + if (head->bbNext->bbNum != bottom->bbNum) { - printf("\nUnrolling loop BB%02u", head->bbNext->bbNum); - if (head->bbNext->bbNum != bottom->bbNum) - { - printf("..BB%02u", bottom->bbNum); - } - printf(" over V%02u from %u to %u", lvar, lbeg, llim); - printf(" unrollCostSz = %d\n", unrollCostSz); - printf("\n"); + printf("..BB%02u", bottom->bbNum); } + printf(" over V%02u from %u to %u", lvar, lbeg, llim); + printf(" unrollCostSz = %d\n", unrollCostSz); + printf("\n"); + } #endif - /* Create the unrolled loop statement list */ - + /* Create the unrolled loop statement list */ + { loopList = loopLast = nullptr; for (lval = lbeg; totalIter; totalIter--) @@ -3239,8 +3234,8 @@ void Compiler::optUnrollLoops() fgRemoveRefPred(head->bbNext, bottom); /* Now change the initialization statement in the HEAD to "lvar = lval;" - * (the last value of the iterator in the loop) - * and drop the jump condition since the unrolled loop will always execute */ + * (the last value of the iterator in the loop) + * and drop the jump condition since the unrolled loop will always execute */ init->gtOp.gtOp2->gtIntCon.gtIconVal = lval; @@ -3302,18 +3297,13 @@ void Compiler::optUnrollLoops() /* Make sure to update loop table */ /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly - * (also make head and bottom NULL - to hit an assert or GPF) */ + * (also make head and bottom NULL - to hit an assert or GPF) */ optLoopTable[lnum].lpFlags |= LPFLG_REMOVED; optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr; - - DONE_LOOP:; } - if (!change) - { - break; - } + DONE_LOOP:; } #ifdef DEBUG -- 2.7.4