}
// We require that the weighted ref count be significant.
- if (varDsc->lvRefCntWtd() <= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT / 2))
+ if (varDsc->lvRefCntWtd() <= (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT / 2))
{
continue;
}
BlockSet paramImportantUseDom(BlockSetOps::MakeFull(this));
// This will be threshold for determining heavier-than-average uses
- unsigned paramAvgWtdRefDiv2 = (varDsc->lvRefCntWtd() + varDsc->lvRefCnt() / 2) / (varDsc->lvRefCnt() * 2);
+ BasicBlock::weight_t paramAvgWtdRefDiv2 =
+ (varDsc->lvRefCntWtd() + varDsc->lvRefCnt() / 2) / (varDsc->lvRefCnt() * 2);
bool paramFoundImportantUse = false;
/* dominates all the uses of the local variable */
/* Our default is to use the first block */
- BasicBlock* bestBlock = fgFirstBB;
- unsigned bestWeight = bestBlock->getBBWeight(this);
- BasicBlock* block = bestBlock;
+ BasicBlock* bestBlock = fgFirstBB;
+ BasicBlock::weight_t bestWeight = bestBlock->getBBWeight(this);
+ BasicBlock* block = bestBlock;
#ifdef DEBUG
if (verbose)
const char* dspToString(int blockNumPadding = 0);
#endif // DEBUG
- typedef unsigned weight_t; // Type used to hold block and edge weights
- // Note that for CLR v2.0 and earlier our
- // block weights were stored using unsigned shorts
+ // Type used to hold block and edge weights
+ typedef float weight_t;
-#define BB_UNITY_WEIGHT 100 // how much a normal execute once block weights
-#define BB_LOOP_WEIGHT 8 // how much more loops are weighted
-#define BB_ZERO_WEIGHT 0
-#define BB_MAX_WEIGHT UINT32_MAX // we're using an 'unsigned' for the weight
-#define BB_VERY_HOT_WEIGHT 256 // how many average hits a BB has (per BBT scenario run) for this block
- // to be considered as very hot
+#define BB_UNITY_WEIGHT 100.0f // how much a normal execute once block weighs
+#define BB_UNITY_WEIGHT_UNSIGNED 100 // how much a normal execute once block weighs
+#define BB_LOOP_WEIGHT_SCALE 8.0f // synthetic profile scale factor for loops
+#define BB_ZERO_WEIGHT 0.0f
+#define BB_MAX_WEIGHT FLT_MAX // maximum finite weight -- needs rethinking.
weight_t bbWeight; // The dynamic execution weight of this block
}
// setBBProfileWeight -- Set the profile-derived weight for a basic block
- void setBBProfileWeight(unsigned weight)
+ void setBBProfileWeight(weight_t weight)
{
this->bbFlags |= BBF_PROF_WEIGHT;
this->bbWeight = weight;
if (compiler->fgHaveProfileData())
{
- printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %u\n",
+ printf("; with IBC profile data, edge weights are %s, and fgCalledCount is %.0f\n",
compiler->fgHaveValidEdgeWeights ? "valid" : "invalid", compiler->fgCalledCount);
}
bool fgHaveProfileData();
void fgComputeProfileScale();
- bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weight);
+ bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weight);
void fgInstrumentMethod();
public:
}
// fgProfileRunsCount - returns total number of scenario runs for the profile data
- // or BB_UNITY_WEIGHT when we aren't using profile data.
+ // or BB_UNITY_WEIGHT_UNSIGNED when we aren't using profile data.
unsigned fgProfileRunsCount()
{
- return fgIsUsingProfileWeights() ? fgNumProfileRuns : BB_UNITY_WEIGHT;
+ return fgIsUsingProfileWeights() ? fgNumProfileRuns : BB_UNITY_WEIGHT_UNSIGNED;
}
//-------- Insert a statement at the start or end of a basic block --------
// non-loop predecessors other than the head entry, create a new, empty block that goes (only) to the entry,
// and redirects the preds of the entry to this new block.) Sets the weight of the newly created block to
// "ambientWeight".
- void optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight);
+ void optEnsureUniqueHead(unsigned loopInd, BasicBlock::weight_t ambientWeight);
void optUnrollLoops(); // Unrolls loops (needs to have cost info)
unsigned short csdDefCount; // definition count
unsigned short csdUseCount; // use count (excluding the implicit uses at defs)
- unsigned csdDefWtCnt; // weighted def count
- unsigned csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
+ BasicBlock::weight_t csdDefWtCnt; // weighted def count
+ BasicBlock::weight_t csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
GenTree* csdTree; // treenode containing the 1st occurrence
Statement* csdStmt; // stmt containing the 1st occurrence
#endif // FEATURE_VALNUM_CSE
#if FEATURE_ANYCSE
- bool optDoCSE; // True when we have found a duplicate CSE tree
- bool optValnumCSE_phase; // True when we are executing the optValnumCSE_phase
- unsigned optCSECandidateTotal; // Grand total of CSE candidates for both Lexical and ValNum
- unsigned optCSECandidateCount; // Count of CSE's candidates, reset for Lexical and ValNum CSE's
- unsigned optCSEstart; // The first local variable number that is a CSE
- unsigned optCSEcount; // The total count of CSE's introduced.
- unsigned optCSEweight; // The weight of the current block when we are doing PerformCSE
+ bool optDoCSE; // True when we have found a duplicate CSE tree
+ bool optValnumCSE_phase; // True when we are executing the optValnumCSE_phase
+ unsigned optCSECandidateTotal; // Grand total of CSE candidates for both Lexical and ValNum
+ unsigned optCSECandidateCount; // Count of CSE's candidates, reset for Lexical and ValNum CSE's
+ unsigned optCSEstart; // The first local variable number that is a CSE
+ unsigned optCSEcount; // The total count of CSE's introduced.
+ BasicBlock::weight_t optCSEweight; // The weight of the current block when we are doing PerformCSE
bool optIsCSEcandidate(GenTree* tree);
return codeGen->doDoubleAlign();
}
DWORD getCanDoubleAlign();
- bool shouldDoubleAlign(unsigned refCntStk,
- unsigned refCntReg,
- unsigned refCntWtdReg,
- unsigned refCntStkParam,
- unsigned refCntWtdStkDbl);
+ bool shouldDoubleAlign(unsigned refCntStk,
+ unsigned refCntReg,
+ BasicBlock::weight_t refCntWtdReg,
+ unsigned refCntStkParam,
+ BasicBlock::weight_t refCntWtdStkDbl);
#endif // DOUBLE_ALIGN
bool IsFullPtrRegMapRequired()
#ifdef DEBUG
const char* genES2str(BitVecTraits* traits, EXPSET_TP set);
-const char* refCntWtd2str(unsigned refCntWtd);
+const char* refCntWtd2str(BasicBlock::weight_t refCntWtd);
#endif
/*
weight *= 2;
}
- unsigned newWeight = lvRefCntWtd(state) + weight;
- if (newWeight >= lvRefCntWtd(state))
- { // lvRefCntWtd is an "unsigned". Don't overflow it
- setLvRefCntWtd(newWeight, state);
- }
- else
- { // On overflow we assign UINT32_MAX
- setLvRefCntWtd(UINT32_MAX, state);
- }
+ BasicBlock::weight_t newWeight = lvRefCntWtd(state) + weight;
+ assert(newWeight >= lvRefCntWtd(state));
+ setLvRefCntWtd(newWeight, state);
}
}
// are we compiling for fast code, or are we compiling for blended code and
// inside a loop?
-// We return true for BLENDED_CODE if the Block executes more than BB_LOOP_WEIGHT/2
+// We return true for BLENDED_CODE if the Block executes more than BB_LOOP_WEIGHT_SCALE/2
inline bool Compiler::optFastCodeOrBlendedLoop(BasicBlock::weight_t bbWeight)
{
return (compCodeOpt() == FAST_CODE) ||
- ((compCodeOpt() == BLENDED_CODE) && (bbWeight > (BB_LOOP_WEIGHT / 2 * BB_UNITY_WEIGHT)));
+ ((compCodeOpt() == BLENDED_CODE) && (bbWeight > ((BB_LOOP_WEIGHT_SCALE / 2) * BB_UNITY_WEIGHT)));
}
// are we running on a Intel Pentium 4?
{
assert(block == m_compiler->compCurBB); // compCurBB must already be set.
assert(block->isEmpty() || block->IsLIR());
-
- m_blockWeight = block->getBBWeight(m_compiler);
- m_range = &LIR::AsRange(block);
+ m_range = &LIR::AsRange(block);
DecomposeRangeHelper();
}
//
// Arguments:
// compiler - The compiler context.
-// blockWeight - The weight of the block into which the range will be
-// inserted.
// range - The range to decompose.
//
// Return Value:
// None.
//
-void DecomposeLongs::DecomposeRange(Compiler* compiler, unsigned blockWeight, LIR::Range& range)
+void DecomposeLongs::DecomposeRange(Compiler* compiler, LIR::Range& range)
{
assert(compiler != nullptr);
DecomposeLongs decomposer(compiler);
- decomposer.m_blockWeight = blockWeight;
- decomposer.m_range = ⦥
+ decomposer.m_range = ⦥
decomposer.DecomposeRangeHelper();
}
else
{
LIR::Use src(Range(), &(cast->AsOp()->gtOp1), cast);
- unsigned lclNum = src.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ unsigned lclNum = src.ReplaceWithLclVar(m_compiler);
loResult = src.Def();
// Save address to a temp. It is used in storeIndLow and storeIndHigh trees.
LIR::Use address(Range(), &tree->AsOp()->gtOp1, tree);
- address.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ address.ReplaceWithLclVar(m_compiler);
JITDUMP("[DecomposeStoreInd]: Saving address tree to a temp var:\n");
DISPTREERANGE(Range(), address.Def());
if (!gtLong->AsOp()->gtOp1->OperIsLeaf())
{
LIR::Use op1(Range(), >Long->AsOp()->gtOp1, gtLong);
- op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ op1.ReplaceWithLclVar(m_compiler);
JITDUMP("[DecomposeStoreInd]: Saving low data tree to a temp var:\n");
DISPTREERANGE(Range(), op1.Def());
}
if (!gtLong->AsOp()->gtOp2->OperIsLeaf())
{
LIR::Use op2(Range(), >Long->AsOp()->gtOp2, gtLong);
- op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ op2.ReplaceWithLclVar(m_compiler);
JITDUMP("[DecomposeStoreInd]: Saving high data tree to a temp var:\n");
DISPTREERANGE(Range(), op2.Def());
}
GenTree* indLow = use.Def();
LIR::Use address(Range(), &indLow->AsOp()->gtOp1, indLow);
- address.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ address.ReplaceWithLclVar(m_compiler);
JITDUMP("[DecomposeInd]: Saving addr tree to a temp var:\n");
DISPTREERANGE(Range(), address.Def());
// x = x << 32
LIR::Use loOp1Use(Range(), >Long->AsOp()->gtOp1, gtLong);
- loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ loOp1Use.ReplaceWithLclVar(m_compiler);
hiResult = loOp1Use.Def();
Range().Remove(gtLong);
{
// If the rotate amount is 32, then swap hi and lo
LIR::Use loOp1Use(Range(), >Long->AsOp()->gtOp1, gtLong);
- loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ loOp1Use.ReplaceWithLclVar(m_compiler);
LIR::Use hiOp1Use(Range(), >Long->AsOp()->gtOp2, gtLong);
- hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ hiOp1Use.ReplaceWithLclVar(m_compiler);
hiResult = loOp1Use.Def();
loResult = hiOp1Use.Def();
}
// Otherwise, we need to force var = call()
- unsigned varNum = use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ unsigned varNum = use.ReplaceWithLclVar(m_compiler);
m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
// Decompose the new LclVar use
else
{
LIR::Use opUse(Range(), edge, user);
- opUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ opUse.ReplaceWithLclVar(m_compiler);
return *edge;
}
}
void PrepareForDecomposition();
void DecomposeBlock(BasicBlock* block);
- static void DecomposeRange(Compiler* compiler, unsigned blockWeight, LIR::Range& range);
+ static void DecomposeRange(Compiler* compiler, LIR::Range& range);
private:
inline LIR::Range& Range() const
// Data
Compiler* m_compiler;
- unsigned m_blockWeight;
LIR::Range* m_range;
};
// code to be 16-byte aligned.
//
// 1. For ngen code with IBC data, use 16-byte alignment if the method
- // has been called more than BB_VERY_HOT_WEIGHT times.
+ // has been called more than ScenarioHotWeight times.
// 2. For JITed code and ngen code without IBC data, use 16-byte alignment
// when the code is 16 bytes or smaller. We align small getters/setters
// because of they are penalized heavily on certain hardware when not 16-byte
//
if (emitComp->fgHaveProfileData())
{
- if (emitComp->fgCalledCount > (BB_VERY_HOT_WEIGHT * emitComp->fgProfileRunsCount()))
+ const float scenarioHotWeight = 256.0f;
+ if (emitComp->fgCalledCount > (scenarioHotWeight * emitComp->fgProfileRunsCount()))
{
allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
}
//
if (calleeWeight < callSiteWeight)
{
- JITDUMP(" ... callee entry count %d is less than call site count %d\n", calleeWeight, callSiteWeight);
+ JITDUMP(" ... callee entry count %f is less than call site count %f\n", calleeWeight, callSiteWeight);
impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
return;
}
impInlineInfo->profileScaleFactor = scale;
impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::KNOWN;
- JITDUMP(" call site count %u callee entry count %u scale %f\n", callSiteWeight, calleeWeight, scale);
+ JITDUMP(" call site count %f callee entry count %f scale %f\n", callSiteWeight, calleeWeight, scale);
}
//------------------------------------------------------------------------
// Returns:
// true if data was found
//
-bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weightWB)
+bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weightWB)
{
noway_assert(weightWB != nullptr);
- unsigned weight = 0;
+ BasicBlock::weight_t weight = 0;
#ifdef DEBUG
unsigned hashSeed = fgStressBBProf();
}
else if (hash % 11 == 0)
{
- weight = (hash % 23) * (hash % 29) * (hash % 31);
+ weight = (BasicBlock::weight_t)(hash % 23) * (hash % 29) * (hash % 31);
}
else
{
- weight = (hash % 17) * (hash % 19);
+ weight = (BasicBlock::weight_t)(hash % 17) * (hash % 19);
}
// The first block is never given a weight of zero
if ((offset == 0) && (weight == 0))
{
- weight = 1 + (hash % 5);
+ weight = (BasicBlock::weight_t)1 + (hash % 5);
}
*weightWB = weight;
{
if (fgBlockCounts[i].ILOffset == offset)
{
- *weightWB = fgBlockCounts[i].ExecutionCount;
+ *weightWB = (BasicBlock::weight_t)fgBlockCounts[i].ExecutionCount;
return true;
}
}
curBBdesc->bbCodeOffs = curBBoffs;
curBBdesc->bbCodeOffsEnd = nxtBBoffs;
- unsigned profileWeight;
+ BasicBlock::weight_t profileWeight;
if (fgGetProfileWeightForBasicBlock(curBBoffs, &profileWeight))
{
{
if (impInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN)
{
- profileWeight = (unsigned)(impInlineInfo->profileScaleFactor * profileWeight);
+ double scaledWeight = impInlineInfo->profileScaleFactor * profileWeight;
+ profileWeight = (BasicBlock::weight_t)scaledWeight;
}
}
if (edge->edgeWeightMin() < BB_MAX_WEIGHT)
{
- printf("(%u", edge->edgeWeightMin());
+ printf("(%f", edge->edgeWeightMin());
}
else
{
{
if (edge->edgeWeightMax() < BB_MAX_WEIGHT)
{
- printf("..%u", edge->edgeWeightMax());
+ printf("..%f", edge->edgeWeightMax());
}
else
{
#if DEBUG
if (verbose)
{
- printf("We are using the Profile Weights and fgCalledCount is %d.\n", fgCalledCount);
+ printf("We are using the Profile Weights and fgCalledCount is %.0f.\n", fgCalledCount);
}
#endif
}
slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1;
if (bSrc->bbJumpKind == BBJ_COND)
{
- int diff;
- flowList* otherEdge;
+ BasicBlock::weight_t diff;
+ flowList* otherEdge;
if (bSrc->bbNext == bDst)
{
otherEdge = fgGetPredForBlock(bSrc->bbJumpDest, bSrc);
noway_assert(otherEdge->edgeWeightMin() <= otherEdge->edgeWeightMax());
// Adjust edge->flEdgeWeightMin up or adjust otherEdge->flEdgeWeightMax down
- diff = ((int)bSrc->bbWeight) - ((int)edge->edgeWeightMin() + (int)otherEdge->edgeWeightMax());
+ diff = bSrc->bbWeight - (edge->edgeWeightMin() + otherEdge->edgeWeightMax());
if (diff > 0)
{
assignOK &= edge->setEdgeWeightMinChecked(edge->edgeWeightMin() + diff, slop, &usedSlop);
}
// Adjust otherEdge->flEdgeWeightMin up or adjust edge->flEdgeWeightMax down
- diff = ((int)bSrc->bbWeight) - ((int)otherEdge->edgeWeightMin() + (int)edge->edgeWeightMax());
+ diff = bSrc->bbWeight - (otherEdge->edgeWeightMin() + edge->edgeWeightMax());
if (diff > 0)
{
assignOK &=
}
#ifdef DEBUG
// Now edge->flEdgeWeightMin and otherEdge->flEdgeWeightMax) should add up to bSrc->bbWeight
- diff = ((int)bSrc->bbWeight) - ((int)edge->edgeWeightMin() + (int)otherEdge->edgeWeightMax());
- noway_assert((-((int)slop) <= diff) && (diff <= ((int)slop)));
+ diff = bSrc->bbWeight - (edge->edgeWeightMin() + otherEdge->edgeWeightMax());
+ assert(((-slop) <= diff) && (diff <= slop));
// Now otherEdge->flEdgeWeightMin and edge->flEdgeWeightMax) should add up to bSrc->bbWeight
- diff = ((int)bSrc->bbWeight) - ((int)otherEdge->edgeWeightMin() + (int)edge->edgeWeightMax());
- noway_assert((-((int)slop) <= diff) && (diff <= ((int)slop)));
+ diff = bSrc->bbWeight - (otherEdge->edgeWeightMin() + edge->edgeWeightMax());
+ assert(((-slop) <= diff) && (diff <= slop));
#endif // DEBUG
}
}
bDstWeight -= fgCalledCount;
}
- UINT64 minEdgeWeightSum = 0;
- UINT64 maxEdgeWeightSum = 0;
+ BasicBlock::weight_t minEdgeWeightSum = 0;
+ BasicBlock::weight_t maxEdgeWeightSum = 0;
// Calculate the sums of the minimum and maximum edge weights
for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
// otherMaxEdgesWeightSum is the sum of all of the other edges flEdgeWeightMax values
// This can be used to compute a lower bound for our minimum edge weight
noway_assert(maxEdgeWeightSum >= edge->edgeWeightMax());
- UINT64 otherMaxEdgesWeightSum = maxEdgeWeightSum - edge->edgeWeightMax();
+ BasicBlock::weight_t otherMaxEdgesWeightSum = maxEdgeWeightSum - edge->edgeWeightMax();
// otherMinEdgesWeightSum is the sum of all of the other edges flEdgeWeightMin values
// This can be used to compute an upper bound for our maximum edge weight
noway_assert(minEdgeWeightSum >= edge->edgeWeightMin());
- UINT64 otherMinEdgesWeightSum = minEdgeWeightSum - edge->edgeWeightMin();
+ BasicBlock::weight_t otherMinEdgesWeightSum = minEdgeWeightSum - edge->edgeWeightMin();
if (bDstWeight >= otherMaxEdgesWeightSum)
{
{
newWeightDest = (weightDest - weightJump);
}
- if (weightDest >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+ if (weightDest >= (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT) / 2)
{
- newWeightDest = (weightDest * 2) / (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT);
+ newWeightDest = (weightDest * 2) / (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT);
}
if (newWeightDest > 0)
{
if (fgHaveProfileData())
{
- fprintf(fgxFile, "\n calledCount=\"%d\"", fgCalledCount);
+ fprintf(fgxFile, "\n calledCount=\"%f\"", fgCalledCount);
fprintf(fgxFile, "\n profileData=\"true\"");
}
if (compHndBBtabCount > 0)
if (validWeights)
{
- unsigned edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2;
+ BasicBlock::weight_t edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2;
fprintf(fgxFile, "%slabel=\"%7.2f\"", sep, (double)edgeWeight / weightDivisor);
}
}
if (validWeights)
{
- unsigned edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2;
+ BasicBlock::weight_t edgeWeight = (edge->edgeWeightMin() + edge->edgeWeightMax()) / 2;
fprintf(fgxFile, "\n weight=");
fprintfDouble(fgxFile, ((double)edgeWeight) / weightDivisor);
if (weight <= 99999 * BB_UNITY_WEIGHT)
{
// print weight in this format ddddd.
- printf("%5u.", (weight + (BB_UNITY_WEIGHT / 2)) / BB_UNITY_WEIGHT);
+ printf("%5u.", (unsigned)FloatingPointUtils::round(weight / BB_UNITY_WEIGHT));
}
else // print weight in terms of k (i.e. 156k )
{
// print weight in this format dddddk
BasicBlock::weight_t weightK = weight / 1000;
- printf("%5uk", (weightK + (BB_UNITY_WEIGHT / 2)) / BB_UNITY_WEIGHT);
+ printf("%5uk", (unsigned)FloatingPointUtils::round(weightK / BB_UNITY_WEIGHT));
}
}
else // print weight in this format ddd.dd
printf("%6s", refCntWtd2str(weight));
}
}
- printf(" ");
//
// Display optional IBC weight column.
{
if (block->hasProfileWeight())
{
- printf("%*u", ibcColWidth, block->bbWeight);
+ printf("%*u", ibcColWidth, (unsigned)FloatingPointUtils::round(block->bbWeight));
}
else
{
frequency = InlineCallsiteFrequency::BORING;
}
- // Also capture the block weight of the call site. In the prejit
- // root case, assume there's some hot call site for this method.
- unsigned weight = 0;
+ // Also capture the block weight of the call site.
+ //
+ // In the prejit root case, assume at runtime there might be a hot call site
+ // for this method, so we won't prematurely conclude this method should never
+ // be inlined.
+ //
+ BasicBlock::weight_t weight = 0;
if (pInlineInfo != nullptr)
{
}
else
{
- weight = BB_MAX_WEIGHT;
+ const float prejitHotCallerWeight = 1000000.0f;
+ weight = prejitHotCallerWeight;
}
inlineResult->NoteInt(InlineObservation::CALLSITE_FREQUENCY, static_cast<int>(frequency));
- inlineResult->NoteInt(InlineObservation::CALLSITE_WEIGHT, static_cast<int>(weight));
+ inlineResult->NoteInt(InlineObservation::CALLSITE_WEIGHT, (int)(weight));
// If the call site has profile data, report the relative frequency of the site.
//
// Normalize the bbWeights by multiplying by BB_UNITY_WEIGHT and dividing by the calledCount.
//
- // 1. For methods that do not have IBC data the called weight will always be 100 (BB_UNITY_WEIGHT)
- // and the entry point bbWeight value is almost always 100 (BB_UNITY_WEIGHT)
- // 2. For methods that do have IBC data the called weight is the actual number of calls
- // from the IBC data and the entry point bbWeight value is almost always the actual
- // number of calls from the IBC data.
- //
- // "almost always" - except for the rare case where a loop backedge jumps to BB01
- //
- // We also perform a rounding operation by adding half of the 'calledCount' before performing
- // the division.
- //
- // Thus for both cases we will return 100 (BB_UNITY_WEIGHT) for the entry point BasicBlock
- //
- // Note that with a 100 (BB_UNITY_WEIGHT) values between 1 and 99 represent decimal fractions.
- // (i.e. 33 represents 33% and 75 represents 75%, and values greater than 100 require
- // some kind of loop backedge)
- //
-
- if (this->bbWeight < (BB_MAX_WEIGHT / BB_UNITY_WEIGHT))
- {
- // Calculate the result using unsigned arithmetic
- weight_t result = ((this->bbWeight * BB_UNITY_WEIGHT) + (calledCount / 2)) / calledCount;
-
- // We don't allow a value of zero, as that would imply rarely run
- return max(1, result);
- }
- else
- {
- // Calculate the full result using floating point
- double fullResult = ((double)this->bbWeight * (double)BB_UNITY_WEIGHT) / (double)calledCount;
+ weight_t fullResult = this->bbWeight * BB_UNITY_WEIGHT / calledCount;
- if (fullResult < (double)BB_MAX_WEIGHT)
- {
- // Add 0.5 and truncate to unsigned
- return (weight_t)(fullResult + 0.5);
- }
- else
- {
- return BB_MAX_WEIGHT;
- }
- }
+ return fullResult;
}
}
// Break the tie by:
// - Increasing the weight by 2 if we are a register arg.
// - Increasing the weight by 0.5 if we are a GC type.
+ //
+ // Review: seems odd that this is mixing counts and weights.
if (weight1 != 0)
{
if (dsc1->lvIsRegArg)
{
- weight2 += 2 * BB_UNITY_WEIGHT;
+ weight2 += 2 * BB_UNITY_WEIGHT_UNSIGNED;
}
if (varTypeIsGC(dsc1->TypeGet()))
{
- weight1 += BB_UNITY_WEIGHT / 2;
+ weight1 += BB_UNITY_WEIGHT_UNSIGNED / 2;
}
}
{
if (dsc2->lvIsRegArg)
{
- weight2 += 2 * BB_UNITY_WEIGHT;
+ weight2 += 2 * BB_UNITY_WEIGHT_UNSIGNED;
}
if (varTypeIsGC(dsc2->TypeGet()))
{
- weight2 += BB_UNITY_WEIGHT / 2;
+ weight2 += BB_UNITY_WEIGHT_UNSIGNED / 2;
}
}
assert(!dsc1->lvRegister);
assert(!dsc2->lvRegister);
- unsigned weight1 = dsc1->lvRefCntWtd();
- unsigned weight2 = dsc2->lvRefCntWtd();
+ BasicBlock::weight_t weight1 = dsc1->lvRefCntWtd();
+ BasicBlock::weight_t weight2 = dsc2->lvRefCntWtd();
#ifndef TARGET_ARM
// ARM-TODO: this was disabled for ARM under !FEATURE_FP_REGALLOC; it was probably a left-over from
//
// Arguments:
// compiler - The Compiler context.
-// blockWeight - The weight of the basic block that contains the use.
// lclNum - The local to use for temporary storage. If BAD_VAR_NUM (the
// default) is provided, this method will create and use a new
// local var.
//
// Return Value: The number of the local var used for temporary storage.
//
-unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, unsigned lclNum)
+unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum)
{
assert(IsInitialized());
assert(compiler != nullptr);
bool IsDummyUse() const;
void ReplaceWith(Compiler* compiler, GenTree* replacement);
- unsigned ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, unsigned lclNum = BAD_VAR_NUM);
+ unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM);
};
//------------------------------------------------------------------------
initRange.InsertBefore(nullptr, zero, store);
#if !defined(TARGET_64BIT)
- unsigned blockWeight = block->getBBWeight(this);
- DecomposeLongs::DecomposeRange(this, blockWeight, initRange);
+ DecomposeLongs::DecomposeRange(this, initRange);
#endif // !defined(TARGET_64BIT)
m_pLowering->LowerRange(block, initRange);
// add == true (when divisor == 7 for example):
// mulhi = dividend MULHI magic
// div = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1)
- const bool requiresAdjustment = add;
- const bool requiresDividendMultiuse = requiresAdjustment || !isDiv;
- const unsigned curBBWeight = m_block->getBBWeight(comp);
+ const bool requiresAdjustment = add;
+ const bool requiresDividendMultiuse = requiresAdjustment || !isDiv;
+ const BasicBlock::weight_t curBBWeight = m_block->getBBWeight(comp);
if (requiresDividendMultiuse)
{
// For -3 we need:
// mulhi -= dividend ; requires sub adjust
// div = signbit(mulhi) + sar(mulhi, 1) ; requires shift adjust
- bool requiresAddSubAdjust = signum(divisorValue) != signum(magic);
- bool requiresShiftAdjust = shift != 0;
- bool requiresDividendMultiuse = requiresAddSubAdjust || !isDiv;
- unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
+ bool requiresAddSubAdjust = signum(divisorValue) != signum(magic);
+ bool requiresShiftAdjust = shift != 0;
+ bool requiresDividendMultiuse = requiresAddSubAdjust || !isDiv;
+ BasicBlock::weight_t curBBWeight = comp->compCurBB->getBBWeight(comp);
if (requiresDividendMultiuse)
{
GenTree* oldUseNode = use.Def();
if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM))
{
- use.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), tempNum);
+ use.ReplaceWithLclVar(comp, tempNum);
GenTree* newUseNode = use.Def();
ContainCheckRange(oldUseNode->gtNext, newUseNode);
return newUseNode->AsLclVar();
//
// Returns:
// Weight of ref position.
-unsigned LinearScan::getWeight(RefPosition* refPos)
+BasicBlock::weight_t LinearScan::getWeight(RefPosition* refPos)
{
- unsigned weight;
- GenTree* treeNode = refPos->treeNode;
+ BasicBlock::weight_t weight;
+ GenTree* treeNode = refPos->treeNode;
if (treeNode != nullptr)
{
{
if (useBlockWeights)
{
- unsigned weight1 = block1->getBBWeight(compiler);
- unsigned weight2 = block2->getBBWeight(compiler);
+ BasicBlock::weight_t weight1 = block1->getBBWeight(compiler);
+ BasicBlock::weight_t weight2 = block2->getBBWeight(compiler);
if (weight1 > weight2)
{
// This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism
// for vectors on Arm64, though the actual value may differ.
- unsigned int floatVarCount = 0;
- unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
- unsigned int maybeFPRefCntWtd = 2 * BB_UNITY_WEIGHT;
- VARSET_TP fpMaybeCandidateVars(VarSetOps::UninitVal());
+ unsigned int floatVarCount = 0;
+ BasicBlock::weight_t thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
+ BasicBlock::weight_t maybeFPRefCntWtd = 2 * BB_UNITY_WEIGHT;
+ VARSET_TP fpMaybeCandidateVars(VarSetOps::UninitVal());
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- unsigned int largeVectorVarCount = 0;
- unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
+ unsigned int largeVectorVarCount = 0;
+ BasicBlock::weight_t thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (enregisterLocalVars)
{
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
#if DOUBLE_ALIGN
- unsigned refCntStk = 0;
- unsigned refCntReg = 0;
- unsigned refCntWtdReg = 0;
- unsigned refCntStkParam = 0; // sum of ref counts for all stack based parameters
- unsigned refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles
- doDoubleAlign = false;
- bool checkDoubleAlign = true;
+ unsigned refCntStk = 0;
+ unsigned refCntReg = 0;
+ BasicBlock::weight_t refCntWtdReg = 0;
+ unsigned refCntStkParam = 0; // sum of ref counts for all stack based parameters
+ BasicBlock::weight_t refCntWtdStkDbl = 0; // sum of wtd ref counts for stack based doubles
+ doDoubleAlign = false;
+ bool checkDoubleAlign = true;
if (compiler->codeGen->isFramePointerRequired() || compiler->opts.MinOpts())
{
checkDoubleAlign = false;
{
largeVectorVarCount++;
VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
- unsigned refCntWtd = varDsc->lvRefCntWtd();
+ BasicBlock::weight_t refCntWtd = varDsc->lvRefCntWtd();
if (refCntWtd >= thresholdLargeVectorRefCntWtd)
{
VarSetOps::AddElemD(compiler, largeVectorCalleeSaveCandidateVars, varDsc->lvVarIndex);
if (regType(type) == FloatRegisterType)
{
floatVarCount++;
- unsigned refCntWtd = varDsc->lvRefCntWtd();
+ BasicBlock::weight_t refCntWtd = varDsc->lvRefCntWtd();
if (varDsc->lvIsRegArg)
{
// Don't count the initial reference for register params. In those cases,
// the lclVars allocated to the frame pointer.
// => Here, estimate of the EBP refCnt and weighted refCnt is a wild guess.
//
- unsigned refCntEBP = refCntReg / 8;
- unsigned refCntWtdEBP = refCntWtdReg / 8;
+ unsigned refCntEBP = refCntReg / 8;
+ BasicBlock::weight_t refCntWtdEBP = refCntWtdReg / 8;
doDoubleAlign =
compiler->shouldDoubleAlign(refCntStk, refCntEBP, refCntWtdEBP, refCntStkParam, refCntWtdStkDbl);
//
// Note: This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg()
//
-bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight)
+bool LinearScan::canSpillReg(RegRecord* physRegRecord,
+ LsraLocation refLocation,
+ BasicBlock::weight_t* recentAssignedRefWeight)
{
assert(physRegRecord->assignedInterval != nullptr);
RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition;
// This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg().
// The recentAssignedRefWeight is not updated if either register cannot be spilled.
//
-bool LinearScan::canSpillDoubleReg(RegRecord* physRegRecord,
- LsraLocation refLocation,
- unsigned* recentAssignedRefWeight)
+bool LinearScan::canSpillDoubleReg(RegRecord* physRegRecord,
+ LsraLocation refLocation,
+ BasicBlock::weight_t* recentAssignedRefWeight)
{
assert(genIsValidDoubleReg(physRegRecord->regNum));
- bool retVal = true;
- unsigned weight = BB_ZERO_WEIGHT;
- unsigned weight2 = BB_ZERO_WEIGHT;
+ bool retVal = true;
+ BasicBlock::weight_t weight = BB_ZERO_WEIGHT;
+ BasicBlock::weight_t weight2 = BB_ZERO_WEIGHT;
RegRecord* physRegRecord2 = findAnotherHalfRegRec(physRegRecord);
#ifdef TARGET_ARM
RegRecord* farthestRefPhysRegRecord2 = nullptr;
#endif
- LsraLocation farthestLocation = MinLocation;
- LsraLocation refLocation = refPosition->nodeLocation;
- unsigned farthestRefPosWeight;
+ LsraLocation farthestLocation = MinLocation;
+ LsraLocation refLocation = refPosition->nodeLocation;
+ BasicBlock::weight_t farthestRefPosWeight;
if (allocateIfProfitable)
{
// If allocating a reg is optional, we will consider those ref positions
// initialized to MinLocation, the first available ref position
// will be selected as spill candidate and its weight as the
// fathestRefPosWeight.
- farthestRefPosWeight = BB_MAX_WEIGHT;
+ farthestRefPosWeight = FloatingPointUtils::infinite_float();
}
for (regNumber regNum : Registers(regType))
// We've passed the preliminary checks for a spill candidate.
// Now, if we have a recentAssignedRef, check that it is going to be OK to spill it.
- Interval* assignedInterval = physRegRecord->assignedInterval;
- unsigned recentAssignedRefWeight = BB_ZERO_WEIGHT;
- RefPosition* recentAssignedRef = nullptr;
- RefPosition* recentAssignedRef2 = nullptr;
+ Interval* assignedInterval = physRegRecord->assignedInterval;
+ BasicBlock::weight_t recentAssignedRefWeight = BB_ZERO_WEIGHT;
+ RefPosition* recentAssignedRef = nullptr;
+ RefPosition* recentAssignedRef2 = nullptr;
#ifdef TARGET_ARM
if (current->registerType == TYP_DOUBLE)
{
//
void LinearScan::dumpLsraStats(FILE* file)
{
- unsigned sumSpillCount = 0;
- unsigned sumCopyRegCount = 0;
- unsigned sumResolutionMovCount = 0;
- unsigned sumSplitEdgeCount = 0;
- UINT64 wtdSpillCount = 0;
- UINT64 wtdCopyRegCount = 0;
- UINT64 wtdResolutionMovCount = 0;
+ unsigned sumSpillCount = 0;
+ unsigned sumCopyRegCount = 0;
+ unsigned sumResolutionMovCount = 0;
+ unsigned sumSplitEdgeCount = 0;
+ BasicBlock::weight_t wtdSpillCount = 0;
+ BasicBlock::weight_t wtdCopyRegCount = 0;
+ BasicBlock::weight_t wtdResolutionMovCount = 0;
fprintf(file, "----------\n");
fprintf(file, "LSRA Stats");
sumResolutionMovCount += resolutionMovCount;
sumSplitEdgeCount += splitEdgeCount;
- wtdSpillCount += (UINT64)spillCount * block->bbWeight;
- wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight;
- wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight;
+ wtdSpillCount += spillCount * block->bbWeight;
+ wtdCopyRegCount += copyRegCount * block->bbWeight;
+ wtdResolutionMovCount += resolutionMovCount * block->bbWeight;
}
fprintf(file, "Total Tracked Vars: %d\n", compiler->lvaTrackedCount);
fprintf(file, "Total Reg Cand Vars: %d\n", regCandidateVarCount);
fprintf(file, "Total number of Intervals: %d\n", static_cast<unsigned>(intervals.size() - 1));
fprintf(file, "Total number of RefPositions: %d\n", static_cast<unsigned>(refPositions.size() - 1));
- fprintf(file, "Total Spill Count: %d Weighted: %I64u\n", sumSpillCount, wtdSpillCount);
- fprintf(file, "Total CopyReg Count: %d Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount);
- fprintf(file, "Total ResolutionMov Count: %d Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount);
+ fprintf(file, "Total Spill Count: %d Weighted: %f\n", sumSpillCount, wtdSpillCount);
+ fprintf(file, "Total CopyReg Count: %d Weighted: %f\n", sumCopyRegCount, wtdCopyRegCount);
+ fprintf(file, "Total ResolutionMov Count: %d Weighted: %f\n", sumResolutionMovCount, wtdResolutionMovCount);
fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount);
// compute total number of spill temps created
bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
RegRecord* getSecondHalfRegRec(RegRecord* regRec);
RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
- bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
+ bool canSpillDoubleReg(RegRecord* physRegRecord,
+ LsraLocation refLocation,
+ BasicBlock::weight_t* recentAssignedRefWeight);
void unassignDoublePhysReg(RegRecord* doubleRegRecord);
#endif
void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);
bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
- bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, unsigned* recentAssignedRefWeight);
+ bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, BasicBlock::weight_t* recentAssignedRefWeight);
bool isRegInUse(RegRecord* regRec, RefPosition* refPosition);
// insert refpositions representing prolog zero-inits which will be added later
void associateRefPosWithInterval(RefPosition* rp);
- unsigned getWeight(RefPosition* refPos);
+ BasicBlock::weight_t getWeight(RefPosition* refPos);
/*****************************************************************************
* Register management
// arguments to calls. We undo promotion unless we see enough non-call uses.
//
const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY);
- const unsigned callAppearances = varDsc->lvRefCntWtd(RCS_EARLY);
+ const unsigned callAppearances = (unsigned)varDsc->lvRefCntWtd(RCS_EARLY);
assert(totalAppearances >= callAppearances);
const unsigned nonCallAppearances = totalAppearances - callAppearances;
if (IS_CSE_INDEX(tree->gtCSEnum))
{
- unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
- unsigned CseAvailBit = genCSEnum2bit(CSEnum) * 2;
- unsigned cseAvailCrossCallBit = CseAvailBit + 1;
- CSEdsc* desc = optCSEfindDsc(CSEnum);
- unsigned stmw = block->getBBWeight(this);
+ unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
+ unsigned CseAvailBit = genCSEnum2bit(CSEnum) * 2;
+ unsigned cseAvailCrossCallBit = CseAvailBit + 1;
+ CSEdsc* desc = optCSEfindDsc(CSEnum);
+ BasicBlock::weight_t stmw = block->getBBWeight(this);
isUse = BitVecOps::IsMember(cseLivenessTraits, available_cses, CseAvailBit);
isDef = !isUse; // If is isn't a CSE use, it is a CSE def
Compiler* m_pCompiler;
unsigned m_addCSEcount;
- unsigned aggressiveRefCnt;
- unsigned moderateRefCnt;
+ BasicBlock::weight_t aggressiveRefCnt;
+ BasicBlock::weight_t moderateRefCnt;
unsigned enregCount; // count of the number of predicted enregistered variables
bool largeFrame;
bool hugeFrame;
if (m_pCompiler->verbose)
{
printf("\n");
- printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
- printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
+ printf("Aggressive CSE Promotion cutoff is %f\n", aggressiveRefCnt);
+ printf("Moderate CSE Promotion cutoff is %f\n", moderateRefCnt);
printf("enregCount is %u\n", enregCount);
printf("Framesize estimate is 0x%04X\n", frameSize);
printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
Compiler::CSEdsc* dsc = sortTab[cnt];
GenTree* expr = dsc->csdTree;
- unsigned def;
- unsigned use;
- unsigned cost;
+ BasicBlock::weight_t def;
+ BasicBlock::weight_t use;
+ unsigned cost;
if (CodeOptKind() == Compiler::SMALL_CODE)
{
if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey))
{
- printf("CSE #%02u, {$%-3x, $%-3x} useCnt=%d: [def=%3u, use=%3u, cost=%3u%s]\n :: ",
+ printf("CSE #%02u, {$%-3x, $%-3x} useCnt=%d: [def=%3f, use=%3f, cost=%3u%s]\n :: ",
dsc->csdIndex, dsc->csdHashKey, dsc->defExcSetPromise, dsc->csdUseCount, def, use, cost,
dsc->csdLiveAcrossCall ? ", call" : " ");
}
else
{
size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(dsc->csdHashKey);
- printf("CSE #%02u, {K_%p} useCnt=%d: [def=%3u, use=%3u, cost=%3u%s]\n :: ", dsc->csdIndex,
+ printf("CSE #%02u, {K_%p} useCnt=%d: [def=%3f, use=%3f, cost=%3u%s]\n :: ", dsc->csdIndex,
dspPtr(kVal), dsc->csdUseCount, def, use, cost,
dsc->csdLiveAcrossCall ? ", call" : " ");
}
CSE_Heuristic* m_context;
Compiler::CSEdsc* m_CseDsc;
- unsigned m_cseIndex;
- unsigned m_defCount;
- unsigned m_useCount;
- unsigned m_Cost;
- unsigned m_Size;
+ unsigned m_cseIndex;
+ BasicBlock::weight_t m_defCount;
+ BasicBlock::weight_t m_useCount;
+ unsigned m_Cost;
+ unsigned m_Size;
// When this Candidate is successfully promoted to a CSE we record
// the following information about what category was used when promoting it.
{
return m_cseIndex;
}
- unsigned DefCount()
+ BasicBlock::weight_t DefCount()
{
return m_defCount;
}
- unsigned UseCount()
+ BasicBlock::weight_t UseCount()
{
return m_useCount;
}
unsigned cse_def_cost;
unsigned cse_use_cost;
- unsigned no_cse_cost = 0;
- unsigned yes_cse_cost = 0;
- unsigned extra_yes_cost = 0;
- unsigned extra_no_cost = 0;
+ BasicBlock::weight_t no_cse_cost = 0;
+ BasicBlock::weight_t yes_cse_cost = 0;
+ unsigned extra_yes_cost = 0;
+ unsigned extra_no_cost = 0;
// The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar
// Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar
- unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
+ BasicBlock::weight_t cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
bool canEnregister = true;
unsigned slotCount = 1;
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+ printf("Aggressive CSE Promotion (%f >= %f)\n", cseRefCnt, aggressiveRefCnt);
}
#endif
// With aggressive promotion we expect that the candidate will be enregistered
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+ printf("Aggressive CSE Promotion (%f >= %f)\n", cseRefCnt, aggressiveRefCnt);
}
#endif
// With aggressive promotion we expect that the candidate will be enregistered
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n", cseRefCnt,
+ printf("Moderate CSE Promotion (CSE never live at call) (%f >= %f)\n", cseRefCnt,
moderateRefCnt);
}
#endif
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- printf("Moderate CSE Promotion (%s) (%u >= %u)\n",
+ printf("Moderate CSE Promotion (%s) (%f >= %f)\n",
candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable",
cseRefCnt, moderateRefCnt);
}
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- printf("Conservative CSE Promotion (%s) (%u < %u)\n",
+ printf("Conservative CSE Promotion (%s) (%f < %f)\n",
candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable",
cseRefCnt, moderateRefCnt);
}
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
+ printf("Conservative CSE Promotion (%f < %f)\n", cseRefCnt, moderateRefCnt);
}
#endif
cse_def_cost = 2;
if ((enregCount < (CNT_CALLEE_ENREG * 3 / 2)) || varTypeIsFloating(candidate->Expr()->TypeGet()))
{
// Extra cost in case we have to spill/restore a caller saved register
- extra_yes_cost = BB_UNITY_WEIGHT;
+ extra_yes_cost = BB_UNITY_WEIGHT_UNSIGNED;
if (cseRefCnt < moderateRefCnt) // If Conservative CSE promotion
{
cse_use_cost += 2;
}
- extra_yes_cost = (BB_UNITY_WEIGHT * spillSimdRegInProlog) * 3;
+ extra_yes_cost = (BB_UNITY_WEIGHT_UNSIGNED * spillSimdRegInProlog) * 3;
}
#endif // FEATURE_SIMD
}
#ifdef DEBUG
if (m_pCompiler->verbose)
{
- printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n", cseRefCnt, aggressiveRefCnt,
+ printf("cseRefCnt=%f, aggressiveRefCnt=%f, moderateRefCnt=%f\n", cseRefCnt, aggressiveRefCnt,
moderateRefCnt);
- printf("defCnt=%d, useCnt=%d, cost=%d, size=%d%s\n", candidate->DefCount(), candidate->UseCount(),
+ printf("defCnt=%f, useCnt=%f, cost=%d, size=%d%s\n", candidate->DefCount(), candidate->UseCount(),
candidate->Cost(), candidate->Size(), candidate->LiveAcrossCall() ? ", LiveAcrossCall" : "");
printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost,
extra_no_cost, extra_yes_cost);
- printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost,
+ printf("CSE cost savings check (%f >= %f) %s\n", no_cse_cost, yes_cse_cost,
(no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
}
#endif // DEBUG
/* In stress mode we will make some extra CSEs */
if (no_cse_cost > 0)
{
- int percentage = (no_cse_cost * 100) / yes_cse_cost;
+ int percentage = (int)((no_cse_cost * 100) / yes_cse_cost);
if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage))
{
// It will also put cse0 into SSA if there is just one def.
void PerformCSE(CSE_Candidate* successfulCandidate)
{
- unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
+ BasicBlock::weight_t cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
if (successfulCandidate->LiveAcrossCall() != 0)
{
// As we introduce new LclVars for these CSE we slightly
// increase the cutoffs for aggressive and moderate CSE's
//
- int incr = BB_UNITY_WEIGHT;
+ BasicBlock::weight_t incr = BB_UNITY_WEIGHT;
if (cseRefCnt > aggressiveRefCnt)
{
Thus we increase each block by 7 times the weight of
the loop header block,
if the loops are all properly formed gives us:
- (assuming that BB_LOOP_WEIGHT is 8)
+ (assuming that BB_LOOP_WEIGHT_SCALE is 8)
1 -- non loop basic block
8 -- single loop nesting
{
noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
- unsigned weight;
+ BasicBlock::weight_t weight;
if (curBlk->hasProfileWeight())
{
{
if (dominates)
{
- weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
+ weight = curBlk->bbWeight * BB_LOOP_WEIGHT_SCALE;
}
else
{
- weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
+ weight = curBlk->bbWeight * (BB_LOOP_WEIGHT_SCALE / 2);
}
//
//
if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
{
- unsigned weight = curBlk->bbWeight;
+ BasicBlock::weight_t weight = curBlk->bbWeight;
// Don't unmark blocks that are set to BB_MAX_WEIGHT
// Don't unmark blocks when we are using profile weights
{
/* Merging of blocks can disturb the Dominates
information (see RAID #46649) */
- if (weight < BB_LOOP_WEIGHT)
+ if (weight < BB_LOOP_WEIGHT_SCALE)
{
weight *= 2;
}
weight = BB_MAX_WEIGHT;
}
- assert(weight >= BB_LOOP_WEIGHT);
+ assert(weight >= BB_LOOP_WEIGHT_SCALE);
- curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
+ curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT_SCALE);
}
#ifdef DEBUG
goto DONE_LOOP;
}
// Block weight should no longer have the loop multiplier
- newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT);
+ newBlock->modifyBBWeight(newBlock->bbWeight / BB_LOOP_WEIGHT_SCALE);
// Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
assert(newBlock->bbJumpDest == nullptr);
gtPrepareCost(condTree);
unsigned estDupCostSz = condTree->GetCostSz();
- double loopIterations = (double)BB_LOOP_WEIGHT;
+ double loopIterations = (double)BB_LOOP_WEIGHT_SCALE;
bool allProfileWeightsAreValid = false;
BasicBlock::weight_t weightBlock = block->bbWeight;
optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
// Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
- unsigned depth = optLoopDepth(loopInd);
- unsigned ambientWeight = 1;
+ unsigned depth = optLoopDepth(loopInd);
+ BasicBlock::weight_t ambientWeight = 1;
for (unsigned j = 0; j < depth; j++)
{
- unsigned lastWeight = ambientWeight;
- ambientWeight *= BB_LOOP_WEIGHT;
- // If the multiplication overflowed, stick at max.
- // (Strictly speaking, a multiplication could overflow and still have a result
- // that is >= lastWeight...but if so, the original weight must be pretty large,
- // and it got bigger, so that's OK.)
- if (ambientWeight < lastWeight)
- {
- ambientWeight = BB_MAX_WEIGHT;
- break;
- }
+ BasicBlock::weight_t lastWeight = ambientWeight;
+ ambientWeight *= BB_LOOP_WEIGHT_SCALE;
+ assert(ambientWeight > lastWeight);
}
// If we're in a non-natural loop, the ambient weight might be higher than we computed above.
return curCond;
}
-void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
+void Compiler::optEnsureUniqueHead(unsigned loopInd, BasicBlock::weight_t ambientWeight)
{
BasicBlock* h = optLoopTable[loopInd].lpHead;
BasicBlock* t = optLoopTable[loopInd].lpTop;
while (!blocks->Empty())
{
- BasicBlock* block = blocks->Pop();
- unsigned blockWeight = block->getBBWeight(this);
+ BasicBlock* block = blocks->Pop();
+ BasicBlock::weight_t blockWeight = block->getBBWeight(this);
JITDUMP(" optHoistLoopBlocks " FMT_BB " (weight=%6s) of loop L%02u <" FMT_BB ".." FMT_BB
">, firstBlock is %s\n",
if (allValidProfileWeights)
{
- double loopEnteredCount;
- double loopSkippedCount;
+ BasicBlock::weight_t loopEnteredCount;
+ BasicBlock::weight_t loopSkippedCount;
if (fgHaveValidEdgeWeights)
{
noway_assert(edgeToNext != nullptr);
noway_assert(edgeToJump != nullptr);
- loopEnteredCount =
- ((double)edgeToNext->edgeWeightMin() + (double)edgeToNext->edgeWeightMax()) / 2.0;
- loopSkippedCount =
- ((double)edgeToJump->edgeWeightMin() + (double)edgeToJump->edgeWeightMax()) / 2.0;
+ loopEnteredCount = (edgeToNext->edgeWeightMin() + edgeToNext->edgeWeightMax()) / 2.0f;
+ loopSkippedCount = (edgeToJump->edgeWeightMin() + edgeToJump->edgeWeightMax()) / 2.0f;
}
else
{
- loopEnteredCount = (double)head->bbNext->bbWeight;
- loopSkippedCount = (double)head->bbJumpDest->bbWeight;
+ loopEnteredCount = head->bbNext->bbWeight;
+ loopSkippedCount = head->bbJumpDest->bbWeight;
}
- double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
+ BasicBlock::weight_t loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
// Calculate a good approximation of the preHead's block weight
- unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
+ BasicBlock::weight_t preHeadWeight = (head->bbWeight * loopTakenRatio) + 0.5f;
preHead->setBBWeight(max(preHeadWeight, 1));
noway_assert(!preHead->isRunRarely());
}
// Otherwise, we compare the weighted ref count of ebp-enregistered variables against double the
// ref count for double-aligned values.
//
-bool Compiler::shouldDoubleAlign(
- unsigned refCntStk, unsigned refCntEBP, unsigned refCntWtdEBP, unsigned refCntStkParam, unsigned refCntWtdStkDbl)
+bool Compiler::shouldDoubleAlign(unsigned refCntStk,
+ unsigned refCntEBP,
+ BasicBlock::weight_t refCntWtdEBP,
+ unsigned refCntStkParam,
+ BasicBlock::weight_t refCntWtdStkDbl)
{
bool doDoubleAlign = false;
const unsigned DBL_ALIGN_SETUP_SIZE = 7;
JITDUMP("\nDouble alignment:\n");
JITDUMP(" Bytes that could be saved by not using EBP frame: %i\n", bytesUsed);
- JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
- JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
+ JITDUMP(" Sum of weighted ref counts for EBP enregistered variables: %f\n", refCntWtdEBP);
+ JITDUMP(" Sum of weighted ref counts for weighted stack based doubles: %f\n", refCntWtdStkDbl);
- if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
+ if (((BasicBlock::weight_t)bytesUsed) > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
{
JITDUMP(" Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
}
return temp;
}
-const char* refCntWtd2str(unsigned refCntWtd)
+const char* refCntWtd2str(BasicBlock::weight_t refCntWtd)
{
const int bufSize = 17;
static char num1[bufSize];
}
else
{
- unsigned valueInt = refCntWtd / BB_UNITY_WEIGHT;
- unsigned valueFrac = refCntWtd % BB_UNITY_WEIGHT;
+ float scaledWeight = refCntWtd / BB_UNITY_WEIGHT;
+ float intPart = (float)floor(scaledWeight);
+ bool isLarge = intPart > 1e9;
+ bool isSmall = (intPart < 1e-2) && (intPart != 0);
- if (valueFrac == 0)
+ // Use g format for high dynamic range counts.
+ //
+ if (isLarge || isSmall)
{
- sprintf_s(temp, bufSize, "%u ", valueInt);
+ sprintf_s(temp, bufSize, "%.2g", scaledWeight);
}
else
{
- sprintf_s(temp, bufSize, "%u.%02u", valueInt, (valueFrac * 100 / BB_UNITY_WEIGHT));
+ if (intPart == scaledWeight)
+ {
+ sprintf_s(temp, bufSize, "%lld ", (long long)intPart);
+ }
+ else
+ {
+ sprintf_s(temp, bufSize, "%.2f", scaledWeight);
+ }
}
}
return temp;
return count;
}
+unsigned CountDigits(float num, unsigned base /* = 10 */)
+{
+ assert(2 <= base && base <= 16); // sanity check
+ unsigned count = 1;
+ while (num >= base)
+ {
+ num /= base;
+ ++count;
+ }
+ return count;
+}
+
#endif // DEBUG
double FloatingPointUtils::convertUInt64ToDouble(unsigned __int64 uIntVal)
}
//------------------------------------------------------------------------
+// infinite_float: return an infinite float value
+//
+// Returns:
+// Infinite float value.
+//
+// Notes:
+// This is the predefined constant HUGE_VALF on many platforms.
+//
+float FloatingPointUtils::infinite_float()
+{
+ int32_t bits = 0x7F800000;
+ return *reinterpret_cast<float*>(&bits);
+}
+
+//------------------------------------------------------------------------
// hasPreciseReciprocal: check double for precise reciprocal. E.g. 2.0 <--> 0.5
//
// Arguments:
* Used when outputting strings.
*/
unsigned CountDigits(unsigned num, unsigned base = 10);
+unsigned CountDigits(float num, unsigned base = 10);
#endif // DEBUG
static bool hasPreciseReciprocal(double x);
static bool hasPreciseReciprocal(float x);
+
+ static float infinite_float();
};
// The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but