JIT: change basic block weight to float (#45052)
authorAndy Ayers <andya@microsoft.com>
Tue, 24 Nov 2020 22:25:04 +0000 (14:25 -0800)
committerGitHub <noreply@github.com>
Tue, 24 Nov 2020 22:25:04 +0000 (14:25 -0800)
Change the core data type for basic block weights from unsigned to float,
to simplify overall calculations and allow for a wider dynamic range.

Many changes are straightforward, but a few are worth noting:
* LSRA needs a true max weight, so had to introduce infinity
* I removed some of the overflow checking as floats naturally saturate.
* The simple geometric loop weight scaling (*8 per loop nest level) leads
  to some very large counts in some tests (15 level loop nests). We may
  want to rethink this and scale less aggressively in deep nests.
* Morph's use of the weighted ref counts for RCS_EARLY is nonstandard
  and the values are not actually weights, so I just added a cast back to unsigned.
* Several places in the jit seem to try and compare or combine unweighted
  and weighted counts; I don't think this makes sense. But have left as is.
* Lower, LIR, and Decompose were passing around weights but never using them.
* I had to introduce a special new weight for the inline projection we do
  for the prejit root.

These changes lead to small numbers of diffs, mostly places where small rounding
changes have altered heuristics; notably:
* cse weights
* LSRA's initial take on whether a parameter should be enregistered

Overall diff impact is a wash.

There are almost no diffs without PGO/IBC data. Diffs are slightly more
prominent in the Roslyn assemblies prejitted with some IBC.

I've tried to keep the format of weights the same in dumps (in most places)
and see minimal diffs in dumps too.

24 files changed:
src/coreclr/src/jit/assertionprop.cpp
src/coreclr/src/jit/block.h
src/coreclr/src/jit/codegencommon.cpp
src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/compiler.hpp
src/coreclr/src/jit/decomposelongs.cpp
src/coreclr/src/jit/decomposelongs.h
src/coreclr/src/jit/emit.cpp
src/coreclr/src/jit/flowgraph.cpp
src/coreclr/src/jit/importer.cpp
src/coreclr/src/jit/lclvars.cpp
src/coreclr/src/jit/lir.cpp
src/coreclr/src/jit/lir.h
src/coreclr/src/jit/liveness.cpp
src/coreclr/src/jit/lower.cpp
src/coreclr/src/jit/lower.h
src/coreclr/src/jit/lsra.cpp
src/coreclr/src/jit/lsra.h
src/coreclr/src/jit/morph.cpp
src/coreclr/src/jit/optcse.cpp
src/coreclr/src/jit/optimizer.cpp
src/coreclr/src/jit/regalloc.cpp
src/coreclr/src/jit/utils.cpp
src/coreclr/src/jit/utils.h

index cf2c264..0953ca5 100644 (file)
@@ -129,7 +129,7 @@ void Compiler::optAddCopies()
         }
 
         // We require that the weighted ref count be significant.
-        if (varDsc->lvRefCntWtd() <= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT / 2))
+        if (varDsc->lvRefCntWtd() <= (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT / 2))
         {
             continue;
         }
@@ -143,7 +143,8 @@ void Compiler::optAddCopies()
         BlockSet paramImportantUseDom(BlockSetOps::MakeFull(this));
 
         // This will be threshold for determining heavier-than-average uses
-        unsigned paramAvgWtdRefDiv2 = (varDsc->lvRefCntWtd() + varDsc->lvRefCnt() / 2) / (varDsc->lvRefCnt() * 2);
+        BasicBlock::weight_t paramAvgWtdRefDiv2 =
+            (varDsc->lvRefCntWtd() + varDsc->lvRefCnt() / 2) / (varDsc->lvRefCnt() * 2);
 
         bool paramFoundImportantUse = false;
 
@@ -306,9 +307,9 @@ void Compiler::optAddCopies()
             /* dominates all the uses of the local variable         */
 
             /* Our default is to use the first block */
-            BasicBlock* bestBlock  = fgFirstBB;
-            unsigned    bestWeight = bestBlock->getBBWeight(this);
-            BasicBlock* block      = bestBlock;
+            BasicBlock*          bestBlock  = fgFirstBB;
+            BasicBlock::weight_t bestWeight = bestBlock->getBBWeight(this);
+            BasicBlock*          block      = bestBlock;
 
 #ifdef DEBUG
             if (verbose)
index 949b246..246b321 100644 (file)
@@ -514,16 +514,14 @@ struct BasicBlock : private LIR::Range
     const char* dspToString(int blockNumPadding = 0);
 #endif // DEBUG
 
-    typedef unsigned weight_t; // Type used to hold block and edge weights
-                               // Note that for CLR v2.0 and earlier our
-                               // block weights were stored using unsigned shorts
+    // Type used to hold block and edge weights
+    typedef float weight_t;
 
-#define BB_UNITY_WEIGHT 100 // how much a normal execute once block weights
-#define BB_LOOP_WEIGHT 8    // how much more loops are weighted
-#define BB_ZERO_WEIGHT 0
-#define BB_MAX_WEIGHT UINT32_MAX // we're using an 'unsigned' for the weight
-#define BB_VERY_HOT_WEIGHT 256   // how many average hits a BB has (per BBT scenario run) for this block
-                                 // to be considered as very hot
+#define BB_UNITY_WEIGHT 100.0f       // how much a normal execute once block weighs
+#define BB_UNITY_WEIGHT_UNSIGNED 100 // how much a normal execute once block weighs
+#define BB_LOOP_WEIGHT_SCALE 8.0f    // synthetic profile scale factor for loops
+#define BB_ZERO_WEIGHT 0.0f
+#define BB_MAX_WEIGHT FLT_MAX // maximum finite weight  -- needs rethinking.
 
     weight_t bbWeight; // The dynamic execution weight of this block
 
@@ -551,7 +549,7 @@ struct BasicBlock : private LIR::Range
     }
 
     // setBBProfileWeight -- Set the profile-derived weight for a basic block
-    void setBBProfileWeight(unsigned weight)
+    void setBBProfileWeight(weight_t weight)
     {
         this->bbFlags |= BBF_PROF_WEIGHT;
         this->bbWeight = weight;
index edb9c4d..1cbf0aa 100644 (file)
@@ -2203,7 +2203,7 @@ void CodeGen::genGenerateMachineCode()
 
         if (compiler->fgHaveProfileData())
         {
-            printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
+            printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %.0f\n",
                    compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
         }
 
index 179befb..d09372b 100644 (file)
@@ -5526,7 +5526,7 @@ protected:
 
     bool fgHaveProfileData();
     void fgComputeProfileScale();
-    bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weight);
+    bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weight);
     void fgInstrumentMethod();
 
 public:
@@ -5538,10 +5538,10 @@ public:
     }
 
     // fgProfileRunsCount - returns total number of scenario runs for the profile data
-    //                      or BB_UNITY_WEIGHT when we aren't using profile data.
+    //                      or BB_UNITY_WEIGHT_UNSIGNED when we aren't using profile data.
     unsigned fgProfileRunsCount()
     {
-        return fgIsUsingProfileWeights() ? fgNumProfileRuns : BB_UNITY_WEIGHT;
+        return fgIsUsingProfileWeights() ? fgNumProfileRuns : BB_UNITY_WEIGHT_UNSIGNED;
     }
 
 //-------- Insert a statement at the start or end of a basic block --------
@@ -6080,7 +6080,7 @@ public:
     // non-loop predecessors other than the head entry, create a new, empty block that goes (only) to the entry,
     // and redirects the preds of the entry to this new block.)  Sets the weight of the newly created block to
     // "ambientWeight".
-    void optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight);
+    void optEnsureUniqueHead(unsigned loopInd, BasicBlock::weight_t ambientWeight);
 
     void optUnrollLoops(); // Unrolls loops (needs to have cost info)
 
@@ -6485,8 +6485,8 @@ protected:
         unsigned short csdDefCount; // definition   count
         unsigned short csdUseCount; // use          count  (excluding the implicit uses at defs)
 
-        unsigned csdDefWtCnt; // weighted def count
-        unsigned csdUseWtCnt; // weighted use count  (excluding the implicit uses at defs)
+        BasicBlock::weight_t csdDefWtCnt; // weighted def count
+        BasicBlock::weight_t csdUseWtCnt; // weighted use count  (excluding the implicit uses at defs)
 
         GenTree*    csdTree;  // treenode containing the 1st occurrence
         Statement*  csdStmt;  // stmt containing the 1st occurrence
@@ -6599,13 +6599,13 @@ protected:
 #endif // FEATURE_VALNUM_CSE
 
 #if FEATURE_ANYCSE
-    bool     optDoCSE;             // True when we have found a duplicate CSE tree
-    bool     optValnumCSE_phase;   // True when we are executing the optValnumCSE_phase
-    unsigned optCSECandidateTotal; // Grand total of CSE candidates for both Lexical and ValNum
-    unsigned optCSECandidateCount; // Count of CSE's candidates, reset for Lexical and ValNum CSE's
-    unsigned optCSEstart;          // The first local variable number that is a CSE
-    unsigned optCSEcount;          // The total count of CSE's introduced.
-    unsigned optCSEweight;         // The weight of the current block when we are doing PerformCSE
+    bool                 optDoCSE;             // True when we have found a duplicate CSE tree
+    bool                 optValnumCSE_phase;   // True when we are executing the optValnumCSE_phase
+    unsigned             optCSECandidateTotal; // Grand total of CSE candidates for both Lexical and ValNum
+    unsigned             optCSECandidateCount; // Count of CSE's candidates, reset for Lexical and ValNum CSE's
+    unsigned             optCSEstart;          // The first local variable number that is a CSE
+    unsigned             optCSEcount;          // The total count of CSE's introduced.
+    BasicBlock::weight_t optCSEweight;         // The weight of the current block when we are doing PerformCSE
 
     bool optIsCSEcandidate(GenTree* tree);
 
@@ -7723,11 +7723,11 @@ public:
         return codeGen->doDoubleAlign();
     }
     DWORD getCanDoubleAlign();
-    bool shouldDoubleAlign(unsigned refCntStk,
-                           unsigned refCntReg,
-                           unsigned refCntWtdReg,
-                           unsigned refCntStkParam,
-                           unsigned refCntWtdStkDbl);
+    bool shouldDoubleAlign(unsigned             refCntStk,
+                           unsigned             refCntReg,
+                           BasicBlock::weight_t refCntWtdReg,
+                           unsigned             refCntStkParam,
+                           BasicBlock::weight_t refCntWtdStkDbl);
 #endif // DOUBLE_ALIGN
 
     bool IsFullPtrRegMapRequired()
index aad8329..1ffe017 100644 (file)
@@ -856,7 +856,7 @@ inline unsigned int genCSEnum2bit(unsigned index)
 
 #ifdef DEBUG
 const char* genES2str(BitVecTraits* traits, EXPSET_TP set);
-const char* refCntWtd2str(unsigned refCntWtd);
+const char* refCntWtd2str(BasicBlock::weight_t refCntWtd);
 #endif
 
 /*
@@ -1841,15 +1841,9 @@ inline void LclVarDsc::incRefCnts(BasicBlock::weight_t weight, Compiler* comp, R
                 weight *= 2;
             }
 
-            unsigned newWeight = lvRefCntWtd(state) + weight;
-            if (newWeight >= lvRefCntWtd(state))
-            { // lvRefCntWtd is an "unsigned".  Don't overflow it
-                setLvRefCntWtd(newWeight, state);
-            }
-            else
-            { // On overflow we assign UINT32_MAX
-                setLvRefCntWtd(UINT32_MAX, state);
-            }
+            BasicBlock::weight_t newWeight = lvRefCntWtd(state) + weight;
+            assert(newWeight >= lvRefCntWtd(state));
+            setLvRefCntWtd(newWeight, state);
         }
     }
 
@@ -3612,11 +3606,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 // are we compiling for fast code, or are we compiling for blended code and
 // inside a loop?
-// We return true for BLENDED_CODE if the Block executes more than BB_LOOP_WEIGHT/2
+// We return true for BLENDED_CODE if the Block executes more than BB_LOOP_WEIGHT_SCALE/2
 inline bool Compiler::optFastCodeOrBlendedLoop(BasicBlock::weight_t bbWeight)
 {
     return (compCodeOpt() == FAST_CODE) ||
-           ((compCodeOpt() == BLENDED_CODE) && (bbWeight > (BB_LOOP_WEIGHT / 2 * BB_UNITY_WEIGHT)));
+           ((compCodeOpt() == BLENDED_CODE) && (bbWeight > ((BB_LOOP_WEIGHT_SCALE / 2) * BB_UNITY_WEIGHT)));
 }
 
 // are we running on a Intel Pentium 4?
index 064453d..3365089 100644 (file)
@@ -61,9 +61,7 @@ void DecomposeLongs::DecomposeBlock(BasicBlock* block)
 {
     assert(block == m_compiler->compCurBB); // compCurBB must already be set.
     assert(block->isEmpty() || block->IsLIR());
-
-    m_blockWeight = block->getBBWeight(m_compiler);
-    m_range       = &LIR::AsRange(block);
+    m_range = &LIR::AsRange(block);
     DecomposeRangeHelper();
 }
 
@@ -75,20 +73,17 @@ void DecomposeLongs::DecomposeBlock(BasicBlock* block)
 //
 // Arguments:
 //    compiler    - The compiler context.
-//    blockWeight - The weight of the block into which the range will be
-//                  inserted.
 //    range       - The range to decompose.
 //
 // Return Value:
 //    None.
 //
-void DecomposeLongs::DecomposeRange(Compiler* compiler, unsigned blockWeight, LIR::Range& range)
+void DecomposeLongs::DecomposeRange(Compiler* compiler, LIR::Range& range)
 {
     assert(compiler != nullptr);
 
     DecomposeLongs decomposer(compiler);
-    decomposer.m_blockWeight = blockWeight;
-    decomposer.m_range       = &range;
+    decomposer.m_range = &range;
 
     decomposer.DecomposeRangeHelper();
 }
@@ -626,7 +621,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
             else
             {
                 LIR::Use src(Range(), &(cast->AsOp()->gtOp1), cast);
-                unsigned lclNum = src.ReplaceWithLclVar(m_compiler, m_blockWeight);
+                unsigned lclNum = src.ReplaceWithLclVar(m_compiler);
 
                 loResult = src.Def();
 
@@ -768,14 +763,14 @@ GenTree* DecomposeLongs::DecomposeStoreInd(LIR::Use& use)
 
     // Save address to a temp. It is used in storeIndLow and storeIndHigh trees.
     LIR::Use address(Range(), &tree->AsOp()->gtOp1, tree);
-    address.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    address.ReplaceWithLclVar(m_compiler);
     JITDUMP("[DecomposeStoreInd]: Saving address tree to a temp var:\n");
     DISPTREERANGE(Range(), address.Def());
 
     if (!gtLong->AsOp()->gtOp1->OperIsLeaf())
     {
         LIR::Use op1(Range(), &gtLong->AsOp()->gtOp1, gtLong);
-        op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
+        op1.ReplaceWithLclVar(m_compiler);
         JITDUMP("[DecomposeStoreInd]: Saving low data tree to a temp var:\n");
         DISPTREERANGE(Range(), op1.Def());
     }
@@ -783,7 +778,7 @@ GenTree* DecomposeLongs::DecomposeStoreInd(LIR::Use& use)
     if (!gtLong->AsOp()->gtOp2->OperIsLeaf())
     {
         LIR::Use op2(Range(), &gtLong->AsOp()->gtOp2, gtLong);
-        op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
+        op2.ReplaceWithLclVar(m_compiler);
         JITDUMP("[DecomposeStoreInd]: Saving high data tree to a temp var:\n");
         DISPTREERANGE(Range(), op2.Def());
     }
@@ -841,7 +836,7 @@ GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use)
     GenTree* indLow = use.Def();
 
     LIR::Use address(Range(), &indLow->AsOp()->gtOp1, indLow);
-    address.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    address.ReplaceWithLclVar(m_compiler);
     JITDUMP("[DecomposeInd]: Saving addr tree to a temp var:\n");
     DISPTREERANGE(Range(), address.Def());
 
@@ -1151,7 +1146,7 @@ GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
                         // x = x << 32
 
                         LIR::Use loOp1Use(Range(), &gtLong->AsOp()->gtOp1, gtLong);
-                        loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+                        loOp1Use.ReplaceWithLclVar(m_compiler);
 
                         hiResult = loOp1Use.Def();
                         Range().Remove(gtLong);
@@ -1434,10 +1429,10 @@ GenTree* DecomposeLongs::DecomposeRotate(LIR::Use& use)
     {
         // If the rotate amount is 32, then swap hi and lo
         LIR::Use loOp1Use(Range(), &gtLong->AsOp()->gtOp1, gtLong);
-        loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+        loOp1Use.ReplaceWithLclVar(m_compiler);
 
         LIR::Use hiOp1Use(Range(), &gtLong->AsOp()->gtOp2, gtLong);
-        hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+        hiOp1Use.ReplaceWithLclVar(m_compiler);
 
         hiResult              = loOp1Use.Def();
         loResult              = hiOp1Use.Def();
@@ -1821,7 +1816,7 @@ GenTree* DecomposeLongs::StoreNodeToVar(LIR::Use& use)
     }
 
     // Otherwise, we need to force var = call()
-    unsigned varNum                              = use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    unsigned varNum                              = use.ReplaceWithLclVar(m_compiler);
     m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
 
     // Decompose the new LclVar use
@@ -1848,7 +1843,7 @@ GenTree* DecomposeLongs::RepresentOpAsLocalVar(GenTree* op, GenTree* user, GenTr
     else
     {
         LIR::Use opUse(Range(), edge, user);
-        opUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+        opUse.ReplaceWithLclVar(m_compiler);
         return *edge;
     }
 }
index a9a75f5..cc3bdda 100644 (file)
@@ -25,7 +25,7 @@ public:
     void PrepareForDecomposition();
     void DecomposeBlock(BasicBlock* block);
 
-    static void DecomposeRange(Compiler* compiler, unsigned blockWeight, LIR::Range& range);
+    static void DecomposeRange(Compiler* compiler, LIR::Range& range);
 
 private:
     inline LIR::Range& Range() const
@@ -69,7 +69,6 @@ private:
 
     // Data
     Compiler*   m_compiler;
-    unsigned    m_blockWeight;
     LIR::Range* m_range;
 };
 
index 08d547e..57be302 100644 (file)
@@ -4755,7 +4755,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     // code to be 16-byte aligned.
     //
     // 1. For ngen code with IBC data, use 16-byte alignment if the method
-    //    has been called more than BB_VERY_HOT_WEIGHT times.
+    //    has been called more than ScenarioHotWeight times.
     // 2. For JITed code and ngen code without IBC data, use 16-byte alignment
     //    when the code is 16 bytes or smaller. We align small getters/setters
     //    because of they are penalized heavily on certain hardware when not 16-byte
@@ -4764,7 +4764,8 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     //
     if (emitComp->fgHaveProfileData())
     {
-        if (emitComp->fgCalledCount > (BB_VERY_HOT_WEIGHT * emitComp->fgProfileRunsCount()))
+        const float scenarioHotWeight = 256.0f;
+        if (emitComp->fgCalledCount > (scenarioHotWeight * emitComp->fgProfileRunsCount()))
         {
             allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
         }
index 5e41398..dac8835 100644 (file)
@@ -299,7 +299,7 @@ void Compiler::fgComputeProfileScale()
     //
     if (calleeWeight < callSiteWeight)
     {
-        JITDUMP("   ... callee entry count %d is less than call site count %d\n", calleeWeight, callSiteWeight);
+        JITDUMP("   ... callee entry count %f is less than call site count %f\n", calleeWeight, callSiteWeight);
         impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
         return;
     }
@@ -310,7 +310,7 @@ void Compiler::fgComputeProfileScale()
     impInlineInfo->profileScaleFactor = scale;
     impInlineInfo->profileScaleState  = InlineInfo::ProfileScaleState::KNOWN;
 
-    JITDUMP("   call site count %u callee entry count %u scale %f\n", callSiteWeight, calleeWeight, scale);
+    JITDUMP("   call site count %f callee entry count %f scale %f\n", callSiteWeight, calleeWeight, scale);
 }
 
 //------------------------------------------------------------------------
@@ -323,10 +323,10 @@ void Compiler::fgComputeProfileScale()
 // Returns:
 //   true if data was found
 //
-bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weightWB)
+bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weightWB)
 {
     noway_assert(weightWB != nullptr);
-    unsigned weight = 0;
+    BasicBlock::weight_t weight = 0;
 
 #ifdef DEBUG
     unsigned hashSeed = fgStressBBProf();
@@ -345,17 +345,17 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weigh
         }
         else if (hash % 11 == 0)
         {
-            weight = (hash % 23) * (hash % 29) * (hash % 31);
+            weight = (BasicBlock::weight_t)(hash % 23) * (hash % 29) * (hash % 31);
         }
         else
         {
-            weight = (hash % 17) * (hash % 19);
+            weight = (BasicBlock::weight_t)(hash % 17) * (hash % 19);
         }
 
         // The first block is never given a weight of zero
         if ((offset == 0) && (weight == 0))
         {
-            weight = 1 + (hash % 5);
+            weight = (BasicBlock::weight_t)1 + (hash % 5);
         }
 
         *weightWB = weight;
@@ -372,7 +372,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weigh
     {
         if (fgBlockCounts[i].ILOffset == offset)
         {
-            *weightWB = fgBlockCounts[i].ExecutionCount;
+            *weightWB = (BasicBlock::weight_t)fgBlockCounts[i].ExecutionCount;
             return true;
         }
     }
@@ -5816,7 +5816,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
         curBBdesc->bbCodeOffs    = curBBoffs;
         curBBdesc->bbCodeOffsEnd = nxtBBoffs;
 
-        unsigned profileWeight;
+        BasicBlock::weight_t profileWeight;
 
         if (fgGetProfileWeightForBasicBlock(curBBoffs, &profileWeight))
         {
@@ -5824,7 +5824,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
             {
                 if (impInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN)
                 {
-                    profileWeight = (unsigned)(impInlineInfo->profileScaleFactor * profileWeight);
+                    double scaledWeight = impInlineInfo->profileScaleFactor * profileWeight;
+                    profileWeight       = (BasicBlock::weight_t)scaledWeight;
                 }
             }
 
@@ -13201,7 +13202,7 @@ void Compiler::fgPrintEdgeWeights()
 
                 if (edge->edgeWeightMin() < BB_MAX_WEIGHT)
                 {
-                    printf("(%u", edge->edgeWeightMin());
+                    printf("(%f", edge->edgeWeightMin());
                 }
                 else
                 {
@@ -13211,7 +13212,7 @@ void Compiler::fgPrintEdgeWeights()
                 {
                     if (edge->edgeWeightMax() < BB_MAX_WEIGHT)
                     {
-                        printf("..%u", edge->edgeWeightMax());
+                        printf("..%f", edge->edgeWeightMax());
                     }
                     else
                     {
@@ -13492,7 +13493,7 @@ void Compiler::fgComputeCalledCount(BasicBlock::weight_t returnWeight)
 #if DEBUG
     if (verbose)
     {
-        printf("We are using the Profile Weights and fgCalledCount is %d.\n", fgCalledCount);
+        printf("We are using the Profile Weights and fgCalledCount is %.0f.\n", fgCalledCount);
     }
 #endif
 }
@@ -13614,8 +13615,8 @@ void Compiler::fgComputeEdgeWeights()
                 slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1;
                 if (bSrc->bbJumpKind == BBJ_COND)
                 {
-                    int       diff;
-                    flowList* otherEdge;
+                    BasicBlock::weight_t diff;
+                    flowList*            otherEdge;
                     if (bSrc->bbNext == bDst)
                     {
                         otherEdge = fgGetPredForBlock(bSrc->bbJumpDest, bSrc);
@@ -13628,7 +13629,7 @@ void Compiler::fgComputeEdgeWeights()
                     noway_assert(otherEdge->edgeWeightMin() <= otherEdge->edgeWeightMax());
 
                     // Adjust edge->flEdgeWeightMin up or adjust otherEdge->flEdgeWeightMax down
-                    diff = ((int)bSrc->bbWeight) - ((int)edge->edgeWeightMin() + (int)otherEdge->edgeWeightMax());
+                    diff = bSrc->bbWeight - (edge->edgeWeightMin() + otherEdge->edgeWeightMax());
                     if (diff > 0)
                     {
                         assignOK &= edge->setEdgeWeightMinChecked(edge->edgeWeightMin() + diff, slop, &usedSlop);
@@ -13640,7 +13641,7 @@ void Compiler::fgComputeEdgeWeights()
                     }
 
                     // Adjust otherEdge->flEdgeWeightMin up or adjust edge->flEdgeWeightMax down
-                    diff = ((int)bSrc->bbWeight) - ((int)otherEdge->edgeWeightMin() + (int)edge->edgeWeightMax());
+                    diff = bSrc->bbWeight - (otherEdge->edgeWeightMin() + edge->edgeWeightMax());
                     if (diff > 0)
                     {
                         assignOK &=
@@ -13660,12 +13661,12 @@ void Compiler::fgComputeEdgeWeights()
                     }
 #ifdef DEBUG
                     // Now edge->flEdgeWeightMin and otherEdge->flEdgeWeightMax) should add up to bSrc->bbWeight
-                    diff = ((int)bSrc->bbWeight) - ((int)edge->edgeWeightMin() + (int)otherEdge->edgeWeightMax());
-                    noway_assert((-((int)slop) <= diff) && (diff <= ((int)slop)));
+                    diff = bSrc->bbWeight - (edge->edgeWeightMin() + otherEdge->edgeWeightMax());
+                    assert(((-slop) <= diff) && (diff <= slop));
 
                     // Now otherEdge->flEdgeWeightMin and edge->flEdgeWeightMax) should add up to bSrc->bbWeight
-                    diff = ((int)bSrc->bbWeight) - ((int)otherEdge->edgeWeightMin() + (int)edge->edgeWeightMax());
-                    noway_assert((-((int)slop) <= diff) && (diff <= ((int)slop)));
+                    diff = bSrc->bbWeight - (otherEdge->edgeWeightMin() + edge->edgeWeightMax());
+                    assert(((-slop) <= diff) && (diff <= slop));
 #endif // DEBUG
                 }
             }
@@ -13691,8 +13692,8 @@ void Compiler::fgComputeEdgeWeights()
                     bDstWeight -= fgCalledCount;
                 }
 
-                UINT64 minEdgeWeightSum = 0;
-                UINT64 maxEdgeWeightSum = 0;
+                BasicBlock::weight_t minEdgeWeightSum = 0;
+                BasicBlock::weight_t maxEdgeWeightSum = 0;
 
                 // Calculate the sums of the minimum and maximum edge weights
                 for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
@@ -13718,12 +13719,12 @@ void Compiler::fgComputeEdgeWeights()
                     // otherMaxEdgesWeightSum is the sum of all of the other edges flEdgeWeightMax values
                     // This can be used to compute a lower bound for our minimum edge weight
                     noway_assert(maxEdgeWeightSum >= edge->edgeWeightMax());
-                    UINT64 otherMaxEdgesWeightSum = maxEdgeWeightSum - edge->edgeWeightMax();
+                    BasicBlock::weight_t otherMaxEdgesWeightSum = maxEdgeWeightSum - edge->edgeWeightMax();
 
                     // otherMinEdgesWeightSum is the sum of all of the other edges flEdgeWeightMin values
                     // This can be used to compute an upper bound for our maximum edge weight
                     noway_assert(minEdgeWeightSum >= edge->edgeWeightMin());
-                    UINT64 otherMinEdgesWeightSum = minEdgeWeightSum - edge->edgeWeightMin();
+                    BasicBlock::weight_t otherMinEdgesWeightSum = minEdgeWeightSum - edge->edgeWeightMin();
 
                     if (bDstWeight >= otherMaxEdgesWeightSum)
                     {
@@ -15247,9 +15248,9 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
             {
                 newWeightDest = (weightDest - weightJump);
             }
-            if (weightDest >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+            if (weightDest >= (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT) / 2)
             {
-                newWeightDest = (weightDest * 2) / (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT);
+                newWeightDest = (weightDest * 2) / (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT);
             }
             if (newWeightDest > 0)
             {
@@ -19987,7 +19988,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
 
         if (fgHaveProfileData())
         {
-            fprintf(fgxFile, "\n    calledCount=\"%d\"", fgCalledCount);
+            fprintf(fgxFile, "\n    calledCount=\"%f\"", fgCalledCount);
             fprintf(fgxFile, "\n    profileData=\"true\"");
         }
         if (compHndBBtabCount > 0)
@@ -20158,7 +20159,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
 
                     if (validWeights)
                     {
-                        unsigned edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2;
+                        BasicBlock::weight_t edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2;
                         fprintf(fgxFile, "%slabel=\"%7.2f\"", sep, (double)edgeWeight / weightDivisor);
                     }
 
@@ -20183,7 +20184,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
                     }
                     if (validWeights)
                     {
-                        unsigned edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2;
+                        BasicBlock::weight_t edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2;
                         fprintf(fgxFile, "\n            weight=");
                         fprintfDouble(fgxFile, ((double)edgeWeight) / weightDivisor);
 
@@ -20418,13 +20419,13 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 *
             if (weight <= 99999 * BB_UNITY_WEIGHT)
             {
                 // print weight in this format ddddd.
-                printf("%5u.", (weight + (BB_UNITY_WEIGHT / 2)) / BB_UNITY_WEIGHT);
+                printf("%5u.", (unsigned)FloatingPointUtils::round(weight / BB_UNITY_WEIGHT));
             }
             else // print weight in terms of k (i.e. 156k )
             {
                 // print weight in this format dddddk
                 BasicBlock::weight_t weightK = weight / 1000;
-                printf("%5uk", (weightK + (BB_UNITY_WEIGHT / 2)) / BB_UNITY_WEIGHT);
+                printf("%5uk", (unsigned)FloatingPointUtils::round(weightK / BB_UNITY_WEIGHT));
             }
         }
         else // print weight in this format ddd.dd
@@ -20432,7 +20433,6 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 *
             printf("%6s", refCntWtd2str(weight));
         }
     }
-    printf(" ");
 
     //
     // Display optional IBC weight column.
@@ -20443,7 +20443,7 @@ void Compiler::fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth /* = 0 *
     {
         if (block->hasProfileWeight())
         {
-            printf("%*u", ibcColWidth, block->bbWeight);
+            printf("%*u", ibcColWidth, (unsigned)FloatingPointUtils::round(block->bbWeight));
         }
         else
         {
index 4e3e50b..3924437 100644 (file)
@@ -18879,9 +18879,13 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I
         frequency = InlineCallsiteFrequency::BORING;
     }
 
-    // Also capture the block weight of the call site.  In the prejit
-    // root case, assume there's some hot call site for this method.
-    unsigned weight = 0;
+    // Also capture the block weight of the call site.
+    //
+    // In the prejit root case, assume at runtime there might be a hot call site
+    // for this method, so we won't prematurely conclude this method should never
+    // be inlined.
+    //
+    BasicBlock::weight_t weight = 0;
 
     if (pInlineInfo != nullptr)
     {
@@ -18889,11 +18893,12 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I
     }
     else
     {
-        weight = BB_MAX_WEIGHT;
+        const float prejitHotCallerWeight = 1000000.0f;
+        weight                            = prejitHotCallerWeight;
     }
 
     inlineResult->NoteInt(InlineObservation::CALLSITE_FREQUENCY, static_cast<int>(frequency));
-    inlineResult->NoteInt(InlineObservation::CALLSITE_WEIGHT, static_cast<int>(weight));
+    inlineResult->NoteInt(InlineObservation::CALLSITE_WEIGHT, (int)(weight));
 
     // If the call site has profile data, report the relative frequency of the site.
     //
index 1dd05c2..f6f5426 100644 (file)
@@ -3147,47 +3147,9 @@ BasicBlock::weight_t BasicBlock::getBBWeight(Compiler* comp)
 
         // Normalize the bbWeights by multiplying by BB_UNITY_WEIGHT and dividing by the calledCount.
         //
-        // 1. For methods that do not have IBC data the called weight will always be 100 (BB_UNITY_WEIGHT)
-        //     and the entry point bbWeight value is almost always 100 (BB_UNITY_WEIGHT)
-        // 2.  For methods that do have IBC data the called weight is the actual number of calls
-        //     from the IBC data and the entry point bbWeight value is almost always the actual
-        //     number of calls from the IBC data.
-        //
-        // "almost always" - except for the rare case where a loop backedge jumps to BB01
-        //
-        // We also perform a rounding operation by adding half of the 'calledCount' before performing
-        // the division.
-        //
-        // Thus for both cases we will return 100 (BB_UNITY_WEIGHT) for the entry point BasicBlock
-        //
-        // Note that with a 100 (BB_UNITY_WEIGHT) values between 1 and 99 represent decimal fractions.
-        // (i.e. 33 represents 33% and 75 represents 75%, and values greater than 100 require
-        //  some kind of loop backedge)
-        //
-
-        if (this->bbWeight < (BB_MAX_WEIGHT / BB_UNITY_WEIGHT))
-        {
-            // Calculate the result using unsigned arithmetic
-            weight_t result = ((this->bbWeight * BB_UNITY_WEIGHT) + (calledCount / 2)) / calledCount;
-
-            // We don't allow a value of zero, as that would imply rarely run
-            return max(1, result);
-        }
-        else
-        {
-            // Calculate the full result using floating point
-            double fullResult = ((double)this->bbWeight * (double)BB_UNITY_WEIGHT) / (double)calledCount;
+        weight_t fullResult = this->bbWeight * BB_UNITY_WEIGHT / calledCount;
 
-            if (fullResult < (double)BB_MAX_WEIGHT)
-            {
-                // Add 0.5 and truncate to unsigned
-                return (weight_t)(fullResult + 0.5);
-            }
-            else
-            {
-                return BB_MAX_WEIGHT;
-            }
-        }
+        return fullResult;
     }
 }
 
@@ -3261,17 +3223,19 @@ public:
         // Break the tie by:
         //   - Increasing the weight by 2   if we are a register arg.
         //   - Increasing the weight by 0.5 if we are a GC type.
+        //
+        // Review: seems odd that this is mixing counts and weights.
 
         if (weight1 != 0)
         {
             if (dsc1->lvIsRegArg)
             {
-                weight2 += 2 * BB_UNITY_WEIGHT;
+                weight2 += 2 * BB_UNITY_WEIGHT_UNSIGNED;
             }
 
             if (varTypeIsGC(dsc1->TypeGet()))
             {
-                weight1 += BB_UNITY_WEIGHT / 2;
+                weight1 += BB_UNITY_WEIGHT_UNSIGNED / 2;
             }
         }
 
@@ -3279,12 +3243,12 @@ public:
         {
             if (dsc2->lvIsRegArg)
             {
-                weight2 += 2 * BB_UNITY_WEIGHT;
+                weight2 += 2 * BB_UNITY_WEIGHT_UNSIGNED;
             }
 
             if (varTypeIsGC(dsc2->TypeGet()))
             {
-                weight2 += BB_UNITY_WEIGHT / 2;
+                weight2 += BB_UNITY_WEIGHT_UNSIGNED / 2;
             }
         }
 
@@ -3328,8 +3292,8 @@ public:
         assert(!dsc1->lvRegister);
         assert(!dsc2->lvRegister);
 
-        unsigned weight1 = dsc1->lvRefCntWtd();
-        unsigned weight2 = dsc2->lvRefCntWtd();
+        BasicBlock::weight_t weight1 = dsc1->lvRefCntWtd();
+        BasicBlock::weight_t weight2 = dsc2->lvRefCntWtd();
 
 #ifndef TARGET_ARM
         // ARM-TODO: this was disabled for ARM under !FEATURE_FP_REGALLOC; it was probably a left-over from
index 290b3fd..08257c0 100644 (file)
@@ -243,14 +243,13 @@ void LIR::Use::ReplaceWith(Compiler* compiler, GenTree* replacement)
 //
 // Arguments:
 //    compiler - The Compiler context.
-//    blockWeight - The weight of the basic block that contains the use.
 //    lclNum - The local to use for temporary storage. If BAD_VAR_NUM (the
 //             default) is provided, this method will create and use a new
 //             local var.
 //
 // Return Value: The number of the local var used for temporary storage.
 //
-unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, unsigned lclNum)
+unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum)
 {
     assert(IsInitialized());
     assert(compiler != nullptr);
index 460a24e..5348b9e 100644 (file)
@@ -74,7 +74,7 @@ public:
         bool IsDummyUse() const;
 
         void ReplaceWith(Compiler* compiler, GenTree* replacement);
-        unsigned ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, unsigned lclNum = BAD_VAR_NUM);
+        unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM);
     };
 
     //------------------------------------------------------------------------
index cbc4ebb..c90571f 100644 (file)
@@ -1015,8 +1015,7 @@ void Compiler::fgExtendDbgLifetimes()
                     initRange.InsertBefore(nullptr, zero, store);
 
 #if !defined(TARGET_64BIT)
-                    unsigned blockWeight = block->getBBWeight(this);
-                    DecomposeLongs::DecomposeRange(this, blockWeight, initRange);
+                    DecomposeLongs::DecomposeRange(this, initRange);
 #endif // !defined(TARGET_64BIT)
                     m_pLowering->LowerRange(block, initRange);
 
index 7a090b5..96a14f9 100644 (file)
@@ -5186,9 +5186,9 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
         // add == true (when divisor == 7 for example):
         //     mulhi = dividend MULHI magic
         //     div   = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1)
-        const bool     requiresAdjustment       = add;
-        const bool     requiresDividendMultiuse = requiresAdjustment || !isDiv;
-        const unsigned curBBWeight              = m_block->getBBWeight(comp);
+        const bool                 requiresAdjustment       = add;
+        const bool                 requiresDividendMultiuse = requiresAdjustment || !isDiv;
+        const BasicBlock::weight_t curBBWeight              = m_block->getBBWeight(comp);
 
         if (requiresDividendMultiuse)
         {
@@ -5375,10 +5375,10 @@ GenTree* Lowering::LowerConstIntDivOrMod(GenTree* node)
         // For -3 we need:
         //     mulhi -= dividend                    ; requires sub adjust
         //     div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust
-        bool     requiresAddSubAdjust     = signum(divisorValue) != signum(magic);
-        bool     requiresShiftAdjust      = shift != 0;
-        bool     requiresDividendMultiuse = requiresAddSubAdjust || !isDiv;
-        unsigned curBBWeight              = comp->compCurBB->getBBWeight(comp);
+        bool                 requiresAddSubAdjust     = signum(divisorValue) != signum(magic);
+        bool                 requiresShiftAdjust      = shift != 0;
+        bool                 requiresDividendMultiuse = requiresAddSubAdjust || !isDiv;
+        BasicBlock::weight_t curBBWeight              = comp->compCurBB->getBBWeight(comp);
 
         if (requiresDividendMultiuse)
         {
index ff13302..c8500c0 100644 (file)
@@ -217,7 +217,7 @@ private:
         GenTree* oldUseNode = use.Def();
         if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM))
         {
-            use.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), tempNum);
+            use.ReplaceWithLclVar(comp, tempNum);
             GenTree* newUseNode = use.Def();
             ContainCheckRange(oldUseNode->gtNext, newUseNode);
             return newUseNode->AsLclVar();
index 6ee1f39..84f3020 100644 (file)
@@ -179,10 +179,10 @@ void lsraAssignRegToTree(GenTree* tree, regNumber reg, unsigned regIdx)
 //
 // Returns:
 //    Weight of ref position.
-unsigned LinearScan::getWeight(RefPosition* refPos)
+BasicBlock::weight_t LinearScan::getWeight(RefPosition* refPos)
 {
-    unsigned weight;
-    GenTree* treeNode = refPos->treeNode;
+    BasicBlock::weight_t weight;
+    GenTree*             treeNode = refPos->treeNode;
 
     if (treeNode != nullptr)
     {
@@ -1037,8 +1037,8 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block
 {
     if (useBlockWeights)
     {
-        unsigned weight1 = block1->getBBWeight(compiler);
-        unsigned weight2 = block2->getBBWeight(compiler);
+        BasicBlock::weight_t weight1 = block1->getBBWeight(compiler);
+        BasicBlock::weight_t weight2 = block2->getBBWeight(compiler);
 
         if (weight1 > weight2)
         {
@@ -1620,13 +1620,13 @@ void LinearScan::identifyCandidates()
     // This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism
     // for vectors on Arm64, though the actual value may differ.
 
-    unsigned int floatVarCount        = 0;
-    unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
-    unsigned int maybeFPRefCntWtd     = 2 * BB_UNITY_WEIGHT;
-    VARSET_TP    fpMaybeCandidateVars(VarSetOps::UninitVal());
+    unsigned int         floatVarCount        = 0;
+    BasicBlock::weight_t thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
+    BasicBlock::weight_t maybeFPRefCntWtd     = 2 * BB_UNITY_WEIGHT;
+    VARSET_TP            fpMaybeCandidateVars(VarSetOps::UninitVal());
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-    unsigned int largeVectorVarCount           = 0;
-    unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
+    unsigned int         largeVectorVarCount           = 0;
+    BasicBlock::weight_t thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     if (enregisterLocalVars)
     {
@@ -1638,13 +1638,13 @@ void LinearScan::identifyCandidates()
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     }
 #if DOUBLE_ALIGN
-    unsigned refCntStk       = 0;
-    unsigned refCntReg       = 0;
-    unsigned refCntWtdReg    = 0;
-    unsigned refCntStkParam  = 0; // sum of     ref counts for all stack based parameters
-    unsigned refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles
-    doDoubleAlign            = false;
-    bool checkDoubleAlign    = true;
+    unsigned             refCntStk       = 0;
+    unsigned             refCntReg       = 0;
+    BasicBlock::weight_t refCntWtdReg    = 0;
+    unsigned             refCntStkParam  = 0; // sum of     ref counts for all stack based parameters
+    BasicBlock::weight_t refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles
+    doDoubleAlign                        = false;
+    bool checkDoubleAlign                = true;
     if (compiler->codeGen->isFramePointerRequired() || compiler->opts.MinOpts())
     {
         checkDoubleAlign = false;
@@ -1802,7 +1802,7 @@ void LinearScan::identifyCandidates()
             {
                 largeVectorVarCount++;
                 VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
-                unsigned refCntWtd = varDsc->lvRefCntWtd();
+                BasicBlock::weight_t refCntWtd = varDsc->lvRefCntWtd();
                 if (refCntWtd >= thresholdLargeVectorRefCntWtd)
                 {
                     VarSetOps::AddElemD(compiler, largeVectorCalleeSaveCandidateVars, varDsc->lvVarIndex);
@@ -1813,7 +1813,7 @@ void LinearScan::identifyCandidates()
                 if (regType(type) == FloatRegisterType)
             {
                 floatVarCount++;
-                unsigned refCntWtd = varDsc->lvRefCntWtd();
+                BasicBlock::weight_t refCntWtd = varDsc->lvRefCntWtd();
                 if (varDsc->lvIsRegArg)
                 {
                     // Don't count the initial reference for register params.  In those cases,
@@ -1861,8 +1861,8 @@ void LinearScan::identifyCandidates()
         // the lclVars allocated to the frame pointer.
         // => Here, estimate of the EBP refCnt and weighted refCnt is a wild guess.
         //
-        unsigned refCntEBP    = refCntReg / 8;
-        unsigned refCntWtdEBP = refCntWtdReg / 8;
+        unsigned             refCntEBP    = refCntReg / 8;
+        BasicBlock::weight_t refCntWtdEBP = refCntWtdReg / 8;
 
         doDoubleAlign =
             compiler->shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl);
@@ -3297,7 +3297,9 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition*
 //
 // Note: This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg()
 //
-bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight)
+bool LinearScan::canSpillReg(RegRecord*            physRegRecord,
+                             LsraLocation          refLocation,
+                             BasicBlock::weight_t* recentAssignedRefWeight)
 {
     assert(physRegRecord->assignedInterval != nullptr);
     RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition;
@@ -3335,14 +3337,14 @@ bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation,
 //    This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg().
 //    The recentAssignedRefWeight is not updated if either register cannot be spilled.
 //
-bool LinearScan::canSpillDoubleReg(RegRecord*   physRegRecord,
-                                   LsraLocation refLocation,
-                                   unsigned*    recentAssignedRefWeight)
+bool LinearScan::canSpillDoubleReg(RegRecord*            physRegRecord,
+                                   LsraLocation          refLocation,
+                                   BasicBlock::weight_t* recentAssignedRefWeight)
 {
     assert(genIsValidDoubleReg(physRegRecord->regNum));
-    bool     retVal  = true;
-    unsigned weight  = BB_ZERO_WEIGHT;
-    unsigned weight2 = BB_ZERO_WEIGHT;
+    bool                 retVal  = true;
+    BasicBlock::weight_t weight  = BB_ZERO_WEIGHT;
+    BasicBlock::weight_t weight2 = BB_ZERO_WEIGHT;
 
     RegRecord* physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
 
@@ -3686,9 +3688,9 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
 #ifdef TARGET_ARM
     RegRecord* farthestRefPhysRegRecord2 = nullptr;
 #endif
-    LsraLocation farthestLocation = MinLocation;
-    LsraLocation refLocation      = refPosition->nodeLocation;
-    unsigned     farthestRefPosWeight;
+    LsraLocation         farthestLocation = MinLocation;
+    LsraLocation         refLocation      = refPosition->nodeLocation;
+    BasicBlock::weight_t farthestRefPosWeight;
     if (allocateIfProfitable)
     {
         // If allocating a reg is optional, we will consider those ref positions
@@ -3703,7 +3705,7 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
         // initialized to MinLocation, the first available ref position
         // will be selected as spill candidate and its weight as the
         // fathestRefPosWeight.
-        farthestRefPosWeight = BB_MAX_WEIGHT;
+        farthestRefPosWeight = FloatingPointUtils::infinite_float();
     }
 
     for (regNumber regNum : Registers(regType))
@@ -3725,10 +3727,10 @@ regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPositio
 
         // We've passed the preliminary checks for a spill candidate.
         // Now, if we have a recentAssignedRef, check that it is going to be OK to spill it.
-        Interval*    assignedInterval        = physRegRecord->assignedInterval;
-        unsigned     recentAssignedRefWeight = BB_ZERO_WEIGHT;
-        RefPosition* recentAssignedRef       = nullptr;
-        RefPosition* recentAssignedRef2      = nullptr;
+        Interval*            assignedInterval        = physRegRecord->assignedInterval;
+        BasicBlock::weight_t recentAssignedRefWeight = BB_ZERO_WEIGHT;
+        RefPosition*         recentAssignedRef       = nullptr;
+        RefPosition*         recentAssignedRef2      = nullptr;
 #ifdef TARGET_ARM
         if (current->registerType == TYP_DOUBLE)
         {
@@ -9176,13 +9178,13 @@ void LinearScan::updateLsraStat(LsraStat stat, unsigned bbNum)
 //
 void LinearScan::dumpLsraStats(FILE* file)
 {
-    unsigned sumSpillCount         = 0;
-    unsigned sumCopyRegCount       = 0;
-    unsigned sumResolutionMovCount = 0;
-    unsigned sumSplitEdgeCount     = 0;
-    UINT64   wtdSpillCount         = 0;
-    UINT64   wtdCopyRegCount       = 0;
-    UINT64   wtdResolutionMovCount = 0;
+    unsigned             sumSpillCount         = 0;
+    unsigned             sumCopyRegCount       = 0;
+    unsigned             sumResolutionMovCount = 0;
+    unsigned             sumSplitEdgeCount     = 0;
+    BasicBlock::weight_t wtdSpillCount         = 0;
+    BasicBlock::weight_t wtdCopyRegCount       = 0;
+    BasicBlock::weight_t wtdResolutionMovCount = 0;
 
     fprintf(file, "----------\n");
     fprintf(file, "LSRA Stats");
@@ -9227,18 +9229,18 @@ void LinearScan::dumpLsraStats(FILE* file)
         sumResolutionMovCount += resolutionMovCount;
         sumSplitEdgeCount += splitEdgeCount;
 
-        wtdSpillCount += (UINT64)spillCount * block->bbWeight;
-        wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight;
-        wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight;
+        wtdSpillCount += spillCount * block->bbWeight;
+        wtdCopyRegCount += copyRegCount * block->bbWeight;
+        wtdResolutionMovCount += resolutionMovCount * block->bbWeight;
     }
 
     fprintf(file, "Total Tracked Vars:  %d\n", compiler->lvaTrackedCount);
     fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount);
     fprintf(file, "Total number of Intervals: %d\n", static_cast<unsigned>(intervals.size() - 1));
     fprintf(file, "Total number of RefPositions: %d\n", static_cast<unsigned>(refPositions.size() - 1));
-    fprintf(file, "Total Spill Count: %d    Weighted: %I64u\n", sumSpillCount, wtdSpillCount);
-    fprintf(file, "Total CopyReg Count: %d   Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount);
-    fprintf(file, "Total ResolutionMov Count: %d    Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount);
+    fprintf(file, "Total Spill Count: %d    Weighted: %f\n", sumSpillCount, wtdSpillCount);
+    fprintf(file, "Total CopyReg Count: %d   Weighted: %f\n", sumCopyRegCount, wtdCopyRegCount);
+    fprintf(file, "Total ResolutionMov Count: %d    Weighted: %f\n", sumResolutionMovCount, wtdResolutionMovCount);
     fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount);
 
     // compute total number of spill temps created
index 0b1d994..0d443f9 100644 (file)
@@ -976,7 +976,9 @@ private:
     bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
     RegRecord* getSecondHalfRegRec(RegRecord* regRec);
     RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
-    bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
+    bool canSpillDoubleReg(RegRecord*            physRegRecord,
+                           LsraLocation          refLocation,
+                           BasicBlock::weight_t* recentAssignedRefWeight);
     void unassignDoublePhysReg(RegRecord* doubleRegRecord);
 #endif
     void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);
@@ -984,7 +986,7 @@ private:
     bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
     bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
     bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
-    bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
+    bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, BasicBlock::weight_t* recentAssignedRefWeight);
     bool isRegInUse(RegRecord* regRec, RefPosition* refPosition);
 
     // insert refpositions representing prolog zero-inits which will be added later
@@ -1135,7 +1137,7 @@ private:
 
     void associateRefPosWithInterval(RefPosition* rp);
 
-    unsigned getWeight(RefPosition* refPos);
+    BasicBlock::weight_t getWeight(RefPosition* refPos);
 
     /*****************************************************************************
      * Register management
index 25d6bfb..c688677 100644 (file)
@@ -18117,7 +18117,7 @@ void Compiler::fgRetypeImplicitByRefArgs()
                 // arguments to calls. We undo promotion unless we see enough non-call uses.
                 //
                 const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY);
-                const unsigned callAppearances  = varDsc->lvRefCntWtd(RCS_EARLY);
+                const unsigned callAppearances  = (unsigned)varDsc->lvRefCntWtd(RCS_EARLY);
                 assert(totalAppearances >= callAppearances);
                 const unsigned nonCallAppearances = totalAppearances - callAppearances;
 
index f5dc3f2..2684b49 100644 (file)
@@ -1393,11 +1393,11 @@ void Compiler::optValnumCSE_Availablity()
 
                 if (IS_CSE_INDEX(tree->gtCSEnum))
                 {
-                    unsigned CSEnum               = GET_CSE_INDEX(tree->gtCSEnum);
-                    unsigned CseAvailBit          = genCSEnum2bit(CSEnum) * 2;
-                    unsigned cseAvailCrossCallBit = CseAvailBit + 1;
-                    CSEdsc*  desc                 = optCSEfindDsc(CSEnum);
-                    unsigned stmw                 = block->getBBWeight(this);
+                    unsigned             CSEnum               = GET_CSE_INDEX(tree->gtCSEnum);
+                    unsigned             CseAvailBit          = genCSEnum2bit(CSEnum) * 2;
+                    unsigned             cseAvailCrossCallBit = CseAvailBit + 1;
+                    CSEdsc*              desc                 = optCSEfindDsc(CSEnum);
+                    BasicBlock::weight_t stmw                 = block->getBBWeight(this);
 
                     isUse = BitVecOps::IsMember(cseLivenessTraits, available_cses, CseAvailBit);
                     isDef = !isUse; // If is isn't a CSE use, it is a CSE def
@@ -1704,8 +1704,8 @@ class CSE_Heuristic
     Compiler* m_pCompiler;
     unsigned  m_addCSEcount;
 
-    unsigned               aggressiveRefCnt;
-    unsigned               moderateRefCnt;
+    BasicBlock::weight_t   aggressiveRefCnt;
+    BasicBlock::weight_t   moderateRefCnt;
     unsigned               enregCount; // count of the number of predicted enregistered variables
     bool                   largeFrame;
     bool                   hugeFrame;
@@ -1965,8 +1965,8 @@ public:
         if (m_pCompiler->verbose)
         {
             printf("\n");
-            printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
-            printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
+            printf("Aggressive CSE Promotion cutoff is %f\n", aggressiveRefCnt);
+            printf("Moderate CSE Promotion cutoff is %f\n", moderateRefCnt);
             printf("enregCount is %u\n", enregCount);
             printf("Framesize estimate is 0x%04X\n", frameSize);
             printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
@@ -2001,9 +2001,9 @@ public:
                 Compiler::CSEdsc* dsc  = sortTab[cnt];
                 GenTree*          expr = dsc->csdTree;
 
-                unsigned def;
-                unsigned use;
-                unsigned cost;
+                BasicBlock::weight_t def;
+                BasicBlock::weight_t use;
+                unsigned             cost;
 
                 if (CodeOptKind() == Compiler::SMALL_CODE)
                 {
@@ -2020,14 +2020,14 @@ public:
 
                 if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey))
                 {
-                    printf("CSE #%02u, {$%-3x, $%-3x} useCnt=%d: [def=%3u, use=%3u, cost=%3u%s]\n        :: ",
+                    printf("CSE #%02u, {$%-3x, $%-3x} useCnt=%d: [def=%3f, use=%3f, cost=%3u%s]\n        :: ",
                            dsc->csdIndex, dsc->csdHashKey, dsc->defExcSetPromise, dsc->csdUseCount, def, use, cost,
                            dsc->csdLiveAcrossCall ? ", call" : "      ");
                 }
                 else
                 {
                     size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(dsc->csdHashKey);
-                    printf("CSE #%02u, {K_%p} useCnt=%d: [def=%3u, use=%3u, cost=%3u%s]\n        :: ", dsc->csdIndex,
+                    printf("CSE #%02u, {K_%p} useCnt=%d: [def=%3f, use=%3f, cost=%3u%s]\n        :: ", dsc->csdIndex,
                            dspPtr(kVal), dsc->csdUseCount, def, use, cost,
                            dsc->csdLiveAcrossCall ? ", call" : "      ");
                 }
@@ -2050,11 +2050,11 @@ public:
         CSE_Heuristic*    m_context;
         Compiler::CSEdsc* m_CseDsc;
 
-        unsigned m_cseIndex;
-        unsigned m_defCount;
-        unsigned m_useCount;
-        unsigned m_Cost;
-        unsigned m_Size;
+        unsigned             m_cseIndex;
+        BasicBlock::weight_t m_defCount;
+        BasicBlock::weight_t m_useCount;
+        unsigned             m_Cost;
+        unsigned             m_Size;
 
         // When this Candidate is successfully promoted to a CSE we record
         // the following information about what category was used when promoting it.
@@ -2104,11 +2104,11 @@ public:
         {
             return m_cseIndex;
         }
-        unsigned DefCount()
+        BasicBlock::weight_t DefCount()
         {
             return m_defCount;
         }
-        unsigned UseCount()
+        BasicBlock::weight_t UseCount()
         {
             return m_useCount;
         }
@@ -2336,14 +2336,14 @@ public:
         unsigned cse_def_cost;
         unsigned cse_use_cost;
 
-        unsigned no_cse_cost    = 0;
-        unsigned yes_cse_cost   = 0;
-        unsigned extra_yes_cost = 0;
-        unsigned extra_no_cost  = 0;
+        BasicBlock::weight_t no_cse_cost    = 0;
+        BasicBlock::weight_t yes_cse_cost   = 0;
+        unsigned             extra_yes_cost = 0;
+        unsigned             extra_no_cost  = 0;
 
         // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar
         // Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar
-        unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
+        BasicBlock::weight_t cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
 
         bool      canEnregister = true;
         unsigned  slotCount     = 1;
@@ -2381,7 +2381,7 @@ public:
 #ifdef DEBUG
                 if (m_pCompiler->verbose)
                 {
-                    printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+                    printf("Aggressive CSE Promotion (%f >= %f)\n", cseRefCnt, aggressiveRefCnt);
                 }
 #endif
                 // With aggressive promotion we expect that the candidate will be enregistered
@@ -2480,7 +2480,7 @@ public:
 #ifdef DEBUG
                 if (m_pCompiler->verbose)
                 {
-                    printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+                    printf("Aggressive CSE Promotion (%f >= %f)\n", cseRefCnt, aggressiveRefCnt);
                 }
 #endif
                 // With aggressive promotion we expect that the candidate will be enregistered
@@ -2499,7 +2499,7 @@ public:
 #ifdef DEBUG
                     if (m_pCompiler->verbose)
                     {
-                        printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n", cseRefCnt,
+                        printf("Moderate CSE Promotion (CSE never live at call) (%f >= %f)\n", cseRefCnt,
                                moderateRefCnt);
                     }
 #endif
@@ -2511,7 +2511,7 @@ public:
 #ifdef DEBUG
                     if (m_pCompiler->verbose)
                     {
-                        printf("Moderate CSE Promotion (%s) (%u >= %u)\n",
+                        printf("Moderate CSE Promotion (%s) (%f >= %f)\n",
                                candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable",
                                cseRefCnt, moderateRefCnt);
                     }
@@ -2544,7 +2544,7 @@ public:
 #ifdef DEBUG
                     if (m_pCompiler->verbose)
                     {
-                        printf("Conservative CSE Promotion (%s) (%u < %u)\n",
+                        printf("Conservative CSE Promotion (%s) (%f < %f)\n",
                                candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable",
                                cseRefCnt, moderateRefCnt);
                     }
@@ -2557,7 +2557,7 @@ public:
 #ifdef DEBUG
                     if (m_pCompiler->verbose)
                     {
-                        printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
+                        printf("Conservative CSE Promotion (%f < %f)\n", cseRefCnt, moderateRefCnt);
                     }
 #endif
                     cse_def_cost = 2;
@@ -2589,7 +2589,7 @@ public:
             if ((enregCount < (CNT_CALLEE_ENREG * 3 / 2)) || varTypeIsFloating(candidate->Expr()->TypeGet()))
             {
                 // Extra cost in case we have to spill/restore a caller saved register
-                extra_yes_cost = BB_UNITY_WEIGHT;
+                extra_yes_cost = BB_UNITY_WEIGHT_UNSIGNED;
 
                 if (cseRefCnt < moderateRefCnt) // If Conservative CSE promotion
                 {
@@ -2623,7 +2623,7 @@ public:
                     cse_use_cost += 2;
                 }
 
-                extra_yes_cost = (BB_UNITY_WEIGHT * spillSimdRegInProlog) * 3;
+                extra_yes_cost = (BB_UNITY_WEIGHT_UNSIGNED * spillSimdRegInProlog) * 3;
             }
 #endif // FEATURE_SIMD
         }
@@ -2649,14 +2649,14 @@ public:
 #ifdef DEBUG
         if (m_pCompiler->verbose)
         {
-            printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n", cseRefCnt, aggressiveRefCnt,
+            printf("cseRefCnt=%f, aggressiveRefCnt=%f, moderateRefCnt=%f\n", cseRefCnt, aggressiveRefCnt,
                    moderateRefCnt);
-            printf("defCnt=%d, useCnt=%d, cost=%d, size=%d%s\n", candidate->DefCount(), candidate->UseCount(),
+            printf("defCnt=%f, useCnt=%f, cost=%d, size=%d%s\n", candidate->DefCount(), candidate->UseCount(),
                    candidate->Cost(), candidate->Size(), candidate->LiveAcrossCall() ? ", LiveAcrossCall" : "");
             printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost,
                    extra_no_cost, extra_yes_cost);
 
-            printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost,
+            printf("CSE cost savings check (%f >= %f) %s\n", no_cse_cost, yes_cse_cost,
                    (no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
         }
 #endif // DEBUG
@@ -2673,7 +2673,7 @@ public:
             /* In stress mode we will make some extra CSEs */
             if (no_cse_cost > 0)
             {
-                int percentage = (no_cse_cost * 100) / yes_cse_cost;
+                int percentage = (int)((no_cse_cost * 100) / yes_cse_cost);
 
                 if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage))
                 {
@@ -2719,14 +2719,14 @@ public:
     // It will also put cse0 into SSA if there is just one def.
     void PerformCSE(CSE_Candidate* successfulCandidate)
     {
-        unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
+        BasicBlock::weight_t cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
 
         if (successfulCandidate->LiveAcrossCall() != 0)
         {
             // As we introduce new LclVars for these CSE we slightly
             // increase the cutoffs for aggressive and moderate CSE's
             //
-            int incr = BB_UNITY_WEIGHT;
+            BasicBlock::weight_t incr = BB_UNITY_WEIGHT;
 
             if (cseRefCnt > aggressiveRefCnt)
             {
index 49ced3a..b7562f6 100644 (file)
@@ -133,7 +133,7 @@ void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool ex
        Thus we increase each block by 7 times the weight of
        the loop header block,
        if the loops are all properly formed gives us:
-       (assuming that BB_LOOP_WEIGHT is 8)
+       (assuming that BB_LOOP_WEIGHT_SCALE is 8)
 
           1 -- non loop basic block
           8 -- single loop nesting
@@ -217,7 +217,7 @@ void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool ex
             {
                 noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
 
-                unsigned weight;
+                BasicBlock::weight_t weight;
 
                 if (curBlk->hasProfileWeight())
                 {
@@ -228,11 +228,11 @@ void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool ex
                 {
                     if (dominates)
                     {
-                        weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
+                        weight = curBlk->bbWeight * BB_LOOP_WEIGHT_SCALE;
                     }
                     else
                     {
-                        weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
+                        weight = curBlk->bbWeight * (BB_LOOP_WEIGHT_SCALE / 2);
                     }
 
                     //
@@ -357,7 +357,7 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
         //
         if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
         {
-            unsigned weight = curBlk->bbWeight;
+            BasicBlock::weight_t weight = curBlk->bbWeight;
 
             // Don't unmark blocks that are set to BB_MAX_WEIGHT
             // Don't unmark blocks when we are using profile weights
@@ -372,7 +372,7 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
                 {
                     /* Merging of blocks can disturb the Dominates
                        information (see RAID #46649) */
-                    if (weight < BB_LOOP_WEIGHT)
+                    if (weight < BB_LOOP_WEIGHT_SCALE)
                     {
                         weight *= 2;
                     }
@@ -384,9 +384,9 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
                     weight = BB_MAX_WEIGHT;
                 }
 
-                assert(weight >= BB_LOOP_WEIGHT);
+                assert(weight >= BB_LOOP_WEIGHT_SCALE);
 
-                curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
+                curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT_SCALE);
             }
 
 #ifdef DEBUG
@@ -3782,7 +3782,7 @@ void Compiler::optUnrollLoops()
                         goto DONE_LOOP;
                     }
                     // Block weight should no longer have the loop multiplier
-                    newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
+                    newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT_SCALE);
                     // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
                     assert(newBlock->bbJumpDest == nullptr);
 
@@ -4162,7 +4162,7 @@ void Compiler::fgOptWhileLoop(BasicBlock* block)
     gtPrepareCost(condTree);
     unsigned estDupCostSz = condTree->GetCostSz();
 
-    double loopIterations = (double)BB_LOOP_WEIGHT;
+    double loopIterations = (double)BB_LOOP_WEIGHT_SCALE;
 
     bool                 allProfileWeightsAreValid = false;
     BasicBlock::weight_t weightBlock               = block->bbWeight;
@@ -5154,21 +5154,13 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
             optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
 
     // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
-    unsigned depth         = optLoopDepth(loopInd);
-    unsigned ambientWeight = 1;
+    unsigned             depth         = optLoopDepth(loopInd);
+    BasicBlock::weight_t ambientWeight = 1;
     for (unsigned j = 0; j < depth; j++)
     {
-        unsigned lastWeight = ambientWeight;
-        ambientWeight *= BB_LOOP_WEIGHT;
-        // If the multiplication overflowed, stick at max.
-        // (Strictly speaking, a multiplication could overflow and still have a result
-        // that is >= lastWeight...but if so, the original weight must be pretty large,
-        // and it got bigger, so that's OK.)
-        if (ambientWeight < lastWeight)
-        {
-            ambientWeight = BB_MAX_WEIGHT;
-            break;
-        }
+        BasicBlock::weight_t lastWeight = ambientWeight;
+        ambientWeight *= BB_LOOP_WEIGHT_SCALE;
+        assert(ambientWeight > lastWeight);
     }
 
     // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
@@ -5416,7 +5408,7 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
     return curCond;
 }
 
-void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
+void Compiler::optEnsureUniqueHead(unsigned loopInd, BasicBlock::weight_t ambientWeight)
 {
     BasicBlock* h = optLoopTable[loopInd].lpHead;
     BasicBlock* t = optLoopTable[loopInd].lpTop;
@@ -7185,8 +7177,8 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack<BasicBlock*>* blo
 
     while (!blocks->Empty())
     {
-        BasicBlock* block       = blocks->Pop();
-        unsigned    blockWeight = block->getBBWeight(this);
+        BasicBlock*          block       = blocks->Pop();
+        BasicBlock::weight_t blockWeight = block->getBBWeight(this);
 
         JITDUMP("    optHoistLoopBlocks " FMT_BB " (weight=%6s) of loop L%02u <" FMT_BB ".." FMT_BB
                 ">, firstBlock is %s\n",
@@ -7412,8 +7404,8 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum)
 
             if (allValidProfileWeights)
             {
-                double loopEnteredCount;
-                double loopSkippedCount;
+                BasicBlock::weight_t loopEnteredCount;
+                BasicBlock::weight_t loopSkippedCount;
 
                 if (fgHaveValidEdgeWeights)
                 {
@@ -7422,21 +7414,19 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum)
                     noway_assert(edgeToNext != nullptr);
                     noway_assert(edgeToJump != nullptr);
 
-                    loopEnteredCount =
-                        ((double)edgeToNext->edgeWeightMin() + (double)edgeToNext->edgeWeightMax()) / 2.0;
-                    loopSkippedCount =
-                        ((double)edgeToJump->edgeWeightMin() + (double)edgeToJump->edgeWeightMax()) / 2.0;
+                    loopEnteredCount = (edgeToNext->edgeWeightMin() + edgeToNext->edgeWeightMax()) / 2.0f;
+                    loopSkippedCount = (edgeToJump->edgeWeightMin() + edgeToJump->edgeWeightMax()) / 2.0f;
                 }
                 else
                 {
-                    loopEnteredCount = (double)head->bbNext->bbWeight;
-                    loopSkippedCount = (double)head->bbJumpDest->bbWeight;
+                    loopEnteredCount = head->bbNext->bbWeight;
+                    loopSkippedCount = head->bbJumpDest->bbWeight;
                 }
 
-                double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
+                BasicBlock::weight_t loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
 
                 // Calculate a good approximation of the preHead's block weight
-                unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
+                BasicBlock::weight_t preHeadWeight = (head->bbWeight * loopTakenRatio) + 0.5f;
                 preHead->setBBWeight(max(preHeadWeight, 1));
                 noway_assert(!preHead->isRunRarely());
             }
index 8dc4930..5e609b6 100644 (file)
@@ -61,8 +61,11 @@ DWORD Compiler::getCanDoubleAlign()
 //    Otherwise, we compare the weighted ref count of ebp-enregistered variables against double the
 //    ref count for double-aligned values.
 //
-bool Compiler::shouldDoubleAlign(
-    unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
+bool Compiler::shouldDoubleAlign(unsigned             refCntStk,
+                                 unsigned             refCntEBP,
+                                 BasicBlock::weight_t refCntWtdEBP,
+                                 unsigned             refCntStkParam,
+                                 BasicBlock::weight_t refCntWtdStkDbl)
 {
     bool           doDoubleAlign        = false;
     const unsigned DBL_ALIGN_SETUP_SIZE = 7;
@@ -78,10 +81,10 @@ bool Compiler::shouldDoubleAlign(
 
     JITDUMP("\nDouble alignment:\n");
     JITDUMP("  Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
-    JITDUMP("  Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
-    JITDUMP("  Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
+    JITDUMP("  Sum of weighted ref counts for EBP enregistered variables: %f\n", refCntWtdEBP);
+    JITDUMP("  Sum of weighted ref counts for weighted stack based doubles: %f\n", refCntWtdStkDbl);
 
-    if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
+    if (((BasicBlock::weight_t)bytesUsed) > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
     {
         JITDUMP("    Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
     }
index 1f041a3..c973f6f 100644 (file)
@@ -646,7 +646,7 @@ const char* genES2str(BitVecTraits* traits, EXPSET_TP set)
     return temp;
 }
 
-const char* refCntWtd2str(unsigned refCntWtd)
+const char* refCntWtd2str(BasicBlock::weight_t refCntWtd)
 {
     const int    bufSize = 17;
     static char  num1[bufSize];
@@ -663,16 +663,27 @@ const char* refCntWtd2str(unsigned refCntWtd)
     }
     else
     {
-        unsigned valueInt  = refCntWtd / BB_UNITY_WEIGHT;
-        unsigned valueFrac = refCntWtd % BB_UNITY_WEIGHT;
+        float scaledWeight = refCntWtd / BB_UNITY_WEIGHT;
+        float intPart      = (float)floor(scaledWeight);
+        bool  isLarge      = intPart > 1e9;
+        bool  isSmall      = (intPart < 1e-2) && (intPart != 0);
 
-        if (valueFrac == 0)
+        // Use g format for high dynamic range counts.
+        //
+        if (isLarge || isSmall)
         {
-            sprintf_s(temp, bufSize, "%u   ", valueInt);
+            sprintf_s(temp, bufSize, "%.2g", scaledWeight);
         }
         else
         {
-            sprintf_s(temp, bufSize, "%u.%02u", valueInt, (valueFrac * 100 / BB_UNITY_WEIGHT));
+            if (intPart == scaledWeight)
+            {
+                sprintf_s(temp, bufSize, "%lld   ", (long long)intPart);
+            }
+            else
+            {
+                sprintf_s(temp, bufSize, "%.2f", scaledWeight);
+            }
         }
     }
     return temp;
@@ -1836,6 +1847,18 @@ unsigned CountDigits(unsigned num, unsigned base /* = 10 */)
     return count;
 }
 
+unsigned CountDigits(float num, unsigned base /* = 10 */)
+{
+    assert(2 <= base && base <= 16); // sanity check
+    unsigned count = 1;
+    while (num >= base)
+    {
+        num /= base;
+        ++count;
+    }
+    return count;
+}
+
 #endif // DEBUG
 
 double FloatingPointUtils::convertUInt64ToDouble(unsigned __int64 uIntVal)
@@ -2080,6 +2103,21 @@ bool FloatingPointUtils::isNormal(float x)
 }
 
 //------------------------------------------------------------------------
+// infinite_float: return an infinite float value
+//
+// Returns:
+//    Infinite float value.
+//
+// Notes:
+//    This is the predefined constant HUGE_VALF on many platforms.
+//
+float FloatingPointUtils::infinite_float()
+{
+    int32_t bits = 0x7F800000;
+    return *reinterpret_cast<float*>(&bits);
+}
+
+//------------------------------------------------------------------------
 // hasPreciseReciprocal: check double for precise reciprocal. E.g. 2.0 <--> 0.5
 //
 // Arguments:
index 149ef88..112367a 100644 (file)
@@ -643,6 +643,7 @@ public:
  * Used when outputting strings.
  */
 unsigned CountDigits(unsigned num, unsigned base = 10);
+unsigned CountDigits(float num, unsigned base = 10);
 
 #endif // DEBUG
 
@@ -669,6 +670,8 @@ public:
     static bool hasPreciseReciprocal(double x);
 
     static bool hasPreciseReciprocal(float x);
+
+    static float infinite_float();
 };
 
 // The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but