Stop maintaining cost estimates in the RyuJIT backend. (#6849)
authorPat Gavlin <pgavlin@gmail.com>
Tue, 23 Aug 2016 16:36:27 +0000 (09:36 -0700)
committerGitHub <noreply@github.com>
Tue, 23 Aug 2016 16:36:27 +0000 (09:36 -0700)
* Summarize HIR cost estimates on Compiler.

These cost estimates are made on HIR and are not used by the backend.
Stash them before converting the function to LIR and report them at the
end of compilation.

* Stop maintaining costs in the backend.

The backend does not use the tree cost estimates.

* Remove a bit more cost maintenance.

* Address PR feedback.

src/jit/codegenarm.cpp
src/jit/codegenarm64.cpp
src/jit/codegenxarch.cpp
src/jit/compiler.cpp
src/jit/compiler.h
src/jit/decomposelongs.cpp
src/jit/lir.cpp
src/jit/lower.cpp
src/jit/lsra.cpp
src/jit/rationalize.cpp

index 8ded006..4ce8230 100644 (file)
@@ -174,8 +174,6 @@ void CodeGen::genCodeForBBlist()
 
 #ifdef DEBUG
     genInterruptibleUsed = true;
-    unsigned totalCostEx = 0;
-    unsigned totalCostSz = 0;
 
     // You have to be careful if you create basic blocks from now on
     compiler->fgSafeBasicBlockCreation = false;
@@ -515,14 +513,6 @@ void CodeGen::genCodeForBBlist()
                     }
                 }
             }
-
-            // TODO-LIR: the cost accounting performed below is incorrect: each operator's cost includes the
-            //           cost of its operands, so the total cost of the block is grossly overestimated. Fixing
-            //           this requires the ability to calculate the cost of the operator itself.
-            //
-            // totalCostEx += (UINT64)node->gtCostEx * block->getBBWeight(compiler);
-            // totalCostSz += (UINT64)node->gtCostSz;
-
 #endif // DEBUG
 
             genCodeForTreeNode(node);
@@ -859,7 +849,7 @@ void CodeGen::genCodeForBBlist()
     if (compiler->verbose)
     {
         printf("\n# ");
-        printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
+        printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, compiler->compSizeEstimate);
         printf("%s\n", compiler->info.compFullName);
     }
 #endif
index 75d00b2..3470c2e 100644 (file)
@@ -1419,8 +1419,6 @@ void CodeGen::genCodeForBBlist()
 
 #ifdef DEBUG
     genInterruptibleUsed = true;
-    UINT64 totalCostEx   = 0;
-    UINT64 totalCostSz   = 0;
 
     // You have to be careful if you create basic blocks from now on
     compiler->fgSafeBasicBlockCreation = false;
@@ -1741,14 +1739,6 @@ void CodeGen::genCodeForBBlist()
                     }
                 }
             }
-
-            // TODO-LIR: the cost accounting performed below is incorrect: each operator's cost includes the
-            //           cost of its operands, so the total cost of the block is grossly overestimated. Fixing
-            //           this requires the ability to calculate the cost of the operator itself.
-            //
-            // totalCostEx += (UINT64)node->gtCostEx * block->getBBWeight(compiler);
-            // totalCostSz += (UINT64)node->gtCostSz;
-
 #endif // DEBUG
 
             genCodeForTreeNode(node);
@@ -2077,7 +2067,7 @@ void CodeGen::genCodeForBBlist()
     if (compiler->verbose)
     {
         printf("\n# ");
-        printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
+        printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, compiler->compSizeEstimate);
         printf("%s\n", compiler->info.compFullName);
     }
 #endif
index eb40069..5e35067 100755 (executable)
@@ -331,9 +331,6 @@ void CodeGen::genCodeForBBlist()
 
 #ifdef DEBUG
     genInterruptibleUsed = true;
-    unsigned stmtNum     = 0;
-    UINT64   totalCostEx = 0;
-    UINT64   totalCostSz = 0;
 
     // You have to be careful if you create basic blocks from now on
     compiler->fgSafeBasicBlockCreation = false;
@@ -663,14 +660,6 @@ void CodeGen::genCodeForBBlist()
                     }
                 }
             }
-
-            // TODO-LIR: the cost accounting performed below is incorrect: each operator's cost includes the
-            //           cost of its operands, so the total cost of the block is grossly overestimated. Fixing
-            //           this requires the ability to calculate the cost of the operator itself.
-            //
-            // totalCostEx += (UINT64)node->gtCostEx * block->getBBWeight(compiler);
-            // totalCostSz += (UINT64)node->gtCostSz;
-
 #endif // DEBUG
 
             genCodeForTreeNode(node);
@@ -1106,7 +1095,7 @@ void CodeGen::genCodeForBBlist()
     if (compiler->verbose)
     {
         printf("\n# ");
-        printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
+        printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, compiler->compSizeEstimate);
         printf("%s\n", compiler->info.compFullName);
     }
 #endif
index 6c38f87..c8a362c 100644 (file)
@@ -4424,6 +4424,21 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_F
 
 #ifdef DEBUG
     fgDebugCheckLinks(compStressCompile(STRESS_REMORPH_TREES, 50));
+
+    // Stash the current estimate of the function's size if necessary.
+    if (verbose)
+    {
+        compSizeEstimate = 0;
+        compCycleEstimate = 0;
+        for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            for (GenTreeStmt* statement = block->firstStmt(); statement != nullptr; statement = statement->getNextStmt())
+            {
+                compSizeEstimate += statement->GetCostSz();
+                compCycleEstimate += statement->GetCostEx();
+            }
+        }
+    }
 #endif
 
 #ifndef LEGACY_BACKEND
index 0d76b84..39b8121 100644 (file)
@@ -7336,6 +7336,8 @@ public:
     bool    compCodeGenDone;
     int64_t compNumStatementLinksTraversed; // # of links traversed while doing debug checks
     bool    fgNormalizeEHDone;              // Has the flowgraph EH normalization phase been done?
+    size_t  compSizeEstimate;               // The estimated size of the method as per `gtSetEvalOrder`.
+    size_t  compCycleEstimate;              // The estimated cycle count of the method as per `gtSetEvalOrder`
 #endif                                      // DEBUG
 
     bool fgLocalVarLivenessDone; // Note that this one is used outside of debug.
index 8277a3c..d0502af 100644 (file)
@@ -316,7 +316,6 @@ GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use)
     loResult->gtType  = TYP_INT;
 
     GenTree* hiResult = m_compiler->gtNewLclLNode(varNum, TYP_INT);
-    hiResult->CopyCosts(loResult);
     BlockRange().InsertAfter(loResult, hiResult);
 
     if (varDsc->lvPromoted)
@@ -365,7 +364,6 @@ GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use)
     loResult->gtType        = TYP_INT;
 
     GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4);
-    hiResult->CopyCosts(loResult);
     BlockRange().InsertAfter(loResult, hiResult);
 
     return FinalizeDecomposition(use, loResult, hiResult);
@@ -439,7 +437,6 @@ GenTree* DecomposeLongs::DecomposeStoreLclVar(LIR::Use& use)
     m_compiler->lvaIncRefCnts(tree);
     m_compiler->lvaIncRefCnts(hiStore);
 
-    hiStore->CopyCosts(tree);
     BlockRange().InsertAfter(tree, hiStore);
 
     return hiStore->gtNext;
@@ -474,7 +471,6 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
                 BlockRange().Remove(tree);
 
                 hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0);
-                hiResult->CopyCosts(loResult);
                 BlockRange().InsertAfter(loResult, hiResult);
             }
             else
@@ -513,7 +509,6 @@ GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use)
     loResult->gtType = TYP_INT;
 
     GenTree* hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, hiVal);
-    hiResult->CopyCosts(loResult);
     BlockRange().InsertAfter(loResult, hiResult);
 
     return FinalizeDecomposition(use, loResult, hiResult);
@@ -712,8 +707,6 @@ GenTree* DecomposeLongs::DecomposeStoreInd(LIR::Use& use)
     storeIndHigh->gtFlags = (storeIndLow->gtFlags & (GTF_ALL_EFFECT | GTF_LIVENESS_MASK));
     storeIndHigh->gtFlags |= GTF_REVERSE_OPS;
 
-    m_compiler->gtPrepareCost(storeIndHigh);
-
     BlockRange().InsertAfter(storeIndLow, dataHigh, addrBaseHigh, addrHigh, storeIndHigh);
 
     return storeIndHigh;
@@ -790,8 +783,6 @@ GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use)
         new (m_compiler, GT_LEA) GenTreeAddrMode(TYP_REF, addrBaseHigh, nullptr, 0, genTypeSize(TYP_INT));
     GenTreePtr indHigh = new (m_compiler, GT_IND) GenTreeIndir(GT_IND, TYP_INT, addrHigh, nullptr);
 
-    m_compiler->gtPrepareCost(indHigh);
-
     BlockRange().InsertAfter(indLow, addrBaseHigh, addrHigh, indHigh);
 
     return FinalizeDecomposition(use, indLow, indHigh);
@@ -824,7 +815,6 @@ GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use)
     loResult->gtOp.gtOp1 = loOp1;
 
     GenTree* hiResult = new (m_compiler, GT_NOT) GenTreeOp(GT_NOT, TYP_INT, hiOp1, nullptr);
-    hiResult->CopyCosts(loResult);
     BlockRange().InsertAfter(loResult, hiResult);
 
     return FinalizeDecomposition(use, loResult, hiResult);
@@ -872,9 +862,6 @@ GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
     GenTree* hiResult = m_compiler->gtNewOperNode(GT_NEG, TYP_INT, hiAdjust);
     hiResult->gtFlags = tree->gtFlags;
 
-    // Annotate new nodes with costs. This will re-cost the hiOp1 tree as well.
-    m_compiler->gtPrepareCost(hiResult);
-
     BlockRange().InsertAfter(loResult, zero, hiAdjust, hiResult);
 
     return FinalizeDecomposition(use, loResult, hiResult);
@@ -940,7 +927,6 @@ GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use)
     loResult->gtOp.gtOp2 = loOp2;
 
     GenTree* hiResult = new (m_compiler, oper) GenTreeOp(GetHiOper(oper), TYP_INT, hiOp1, hiOp2);
-    hiResult->CopyCosts(loResult);
     BlockRange().InsertAfter(loResult, hiResult);
 
     if ((oper == GT_ADD) || (oper == GT_SUB))
index 1aab172..a6f392a 100644 (file)
@@ -268,11 +268,9 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, u
     compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler);
 
     GenTreeLclVar* store = compiler->gtNewTempAssign(lclNum, node)->AsLclVar();
-    store->CopyCosts(node);
 
     GenTree* load =
         new (compiler, GT_LCL_VAR) GenTreeLclVar(store->TypeGet(), store->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET);
-    compiler->gtPrepareCost(load);
 
     m_range->InsertAfter(node, store, load);
 
index e596526..ae94ac0 100644 (file)
@@ -830,9 +830,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
                                                                 info->structDesc.eightByteSizes[ctr]),
                         argListPtr->gtOp.gtOp1);
 
-                    // CopyCosts
-                    newOper->CopyCosts(argListPtr->gtOp.gtOp1);
-
                     // Splice in the new GT_PUTARG_REG node in the GT_LIST
                     ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
                 }
@@ -863,9 +860,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
                 // Create a new GT_PUTARG_REG node with op1
                 GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
 
-                // CopyCosts
-                newOper->CopyCosts(argListPtr->gtOp.gtOp1);
-
                 // Splice in the new GT_PUTARG_REG node in the GT_LIST
                 ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
             }
@@ -932,8 +926,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
     }
 
-    putArg->CopyCosts(arg);
-
     if (arg->InReg())
     {
         putArg->SetInReg();
@@ -1052,7 +1044,6 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
             var_types  intType = (type == TYP_DOUBLE) ? TYP_LONG : TYP_INT;
             GenTreePtr intArg  = comp->gtNewOperNode(GT_COPY, intType, arg);
 
-            intArg->CopyCosts(arg);
             info->node = intArg;
             ReplaceArgWithPutArgOrCopy(ppArg, intArg);
 
@@ -2068,7 +2059,6 @@ GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
         GenTreeAddrMode(TYP_REF, originalThisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
 
     GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
-    newThis->SetCosts(IND_COST_EX, 2);
 
     BlockRange().InsertAfter(originalThisExpr, newThisAddr, newThis);
 
@@ -3078,7 +3068,6 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
 
     GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset);
 
-    addrMode->CopyCosts(addr);
     addrMode->gtRsvdRegs = addr->gtRsvdRegs;
     addrMode->gtFlags |= (addr->gtFlags & (GTF_ALL_EFFECT | GTF_IND_FLAGS));
 
@@ -3504,10 +3493,6 @@ GenTree* Lowering::LowerArrElem(GenTree* node)
     GenTreePtr leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset);
     leaNode->gtFlags |= GTF_REVERSE_OPS;
 
-    // Set the costs for all of the new nodes. Depends on the new nodes all participating in the
-    // dataflow tree rooted at `leaNode`.
-    comp->gtPrepareCost(leaNode);
-
     BlockRange().InsertBefore(insertionPoint, leaNode);
 
     LIR::Use arrElemUse;
index 6d87fa9..1db8524 100644 (file)
@@ -7601,7 +7601,6 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned
 
         GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree);
         assert(refPosition->registerAssignment != RBM_NONE);
-        newNode->CopyCosts(tree);
         newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
         newNode->gtLsraInfo.isLsraAdded   = true;
         newNode->gtLsraInfo.isLocalDefUse = false;
index 1127c8f..d23d877 100644 (file)
@@ -335,12 +335,7 @@ void Rationalizer::RewriteCopyBlk(LIR::Use& use)
         // Since destination is known to be a SIMD type, src must be a SIMD type too
         // though we cannot figure it out easily enough. Transform src into
         // GT_IND(src) of simdType.
-        //
-        // We need to initialize costs on indir so that CopyCosts() while creating
-        // an addrmode will not hit asserts. These costs are not used further down
-        // but setting them to a reasonable value based on the logic in gtSetEvalOrder().
         GenTree* indir = comp->gtNewOperNode(GT_IND, simdType, srcAddr);
-        indir->SetCosts(IND_COST_EX, 2);
         BlockRange().InsertAfter(srcAddr, indir);
 
         cpBlk->gtGetOp1()->gtOp.gtOp2 = indir;
@@ -484,7 +479,6 @@ void Rationalizer::RewriteNodeAsCall(GenTree**             use,
     // Create the call node
     GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->gtType, args);
     call              = comp->fgMorphArgs(call);
-    call->CopyCosts(tree);
 #ifdef FEATURE_READYTORUN_COMPILER
     call->gtCall.setEntryPoint(entryPoint);
 #endif
@@ -774,8 +768,6 @@ void Rationalizer::RewriteAssignment(LIR::Use& use)
                 store->gtFlags |= GTF_REVERSE_OPS;
             }
 
-            store->CopyCosts(assignment);
-
             // TODO: JIT dump
 
             // Remove the GT_IND node and replace the assignment node with the store
@@ -989,7 +981,6 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
             if (!isLHSOfAssignment)
             {
                 GenTree* ind = comp->gtNewOperNode(GT_IND, node->TypeGet(), node);
-                ind->CopyCosts(node);
 
                 node->SetOper(GT_CLS_VAR_ADDR);
                 node->gtType = TYP_BYREF;
@@ -1082,8 +1073,6 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
                 GenTree*     address = new (comp, GT_LEA) GenTreeAddrMode(TYP_BYREF, simdNode->gtOp1, simdNode->gtOp2,
                                                                       baseTypeSize, offsetof(CORINFO_Array, u1Elems));
                 GenTree* ind = comp->gtNewOperNode(GT_IND, simdType, address);
-                address->CopyCosts(simdNode);
-                ind->CopyCosts(simdNode);
 
                 BlockRange().InsertBefore(simdNode, address, ind);
                 use.ReplaceWith(comp, ind);