hashDsc->csdHashKey = key;
hashDsc->csdIndex = 0;
- hashDsc->csdLiveAcrossCall = 0;
+ hashDsc->csdLiveAcrossCall = false;
hashDsc->csdDefCount = 0;
hashDsc->csdUseCount = 0;
hashDsc->csdDefWtCnt = 0;
unsigned aggressiveRefCnt;
unsigned moderateRefCnt;
- unsigned enregCount; // count of the number of enregisterable variables
+ unsigned enregCount; // count of the number of predicted enregistered variables
bool largeFrame;
bool hugeFrame;
Compiler::codeOptimize codeOptKind;
sortTab = nullptr;
sortSiz = 0;
-#ifdef _TARGET_XARCH_
- if (m_pCompiler->compLongUsed)
- {
- enregCount++;
- }
-#endif
-
unsigned frameSize = 0;
unsigned regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1);
unsigned lclNum;
for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++)
{
+ // Locals with no references don't use any local stack frame slots
if (varDsc->lvRefCnt() == 0)
{
continue;
}
+ // Incoming stack arguments don't use any local stack frame slots
+ if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ {
+ continue;
+ }
+
#if FEATURE_FIXED_OUT_ARGS
// Skip the OutgoingArgArea in computing frame size, since
// its size is not yet known and it doesn't affect local
// will consider this LclVar as being enregistered.
// Now we reduce the remaining regAvailEstimate by
// an appropriate amount.
+ //
if (varDsc->lvRefCnt() <= 2)
{
// a single use single def LclVar only uses 1
if (frameSize > 0x080)
{
// We likely have a large stack frame.
- // Thus we might need to use large displacements when loading or storing
- // to CSE LclVars that are not enregistered
+ //
+ // On XARCH stack frame displacements can either use a 1-byte or a 4-byte displacement
+ // with a large franme we will need to use some 4-byte displacements.
+ //
largeFrame = true;
break; // early out, we don't need to keep increasing frameSize
}
-#else // _TARGET_ARM_
+#elif _TARGET_ARM32
if (frameSize > 0x0400)
{
+ // We likely have a large stack frame.
+ //
+ // Thus we might need to use large displacements when loading or storing
+ // to CSE LclVars that are not enregistered
+ // On ARM32 this means using rsGetRsvdReg() to hold the large displacement
largeFrame = true;
}
if (frameSize > 0x10000)
{
hugeFrame = true;
- break;
+ break; // early out, we don't need to keep increasing frameSize
+ }
+#elif _TARGET_ARM64_
+ if (frameSize > 0x1000)
+ {
+ // We likely have a large stack frame.
+ //
+ // Thus we might need to use large displacements when loading or storing
+ // to CSE LclVars that are not enregistered
+ // On ARM64 this means using rsGetRsvdReg() to hold the large displacement
+ //
+ largeFrame = true;
+ break; // early out, we don't need to keep increasing frameSize
}
#endif
}
+ // Iterate over the sorted list of tracked local variables
+ // these are the register candidates for LSRA
+ // We normally vist the LclVar in order of their weighted ref counts
+ // and our hueristic assumes that the highest weighted ref count
+ // LclVars will be enregistered and that the lowest weighted ref count
+ // are likely be allocated in the stack frame.
+ // The value of enregCount is incremented when we visit a LclVar
+ // that can be enregistered.
+ //
for (unsigned trackedIndex = 0; trackedIndex < m_pCompiler->lvaTrackedCount; trackedIndex++)
{
LclVarDsc* varDsc = m_pCompiler->lvaGetDescByTrackedIndex(trackedIndex);
var_types varTyp = varDsc->TypeGet();
- if (varDsc->lvDoNotEnregister)
+ // Locals with no references aren't enregistered
+ if (varDsc->lvRefCnt() == 0)
{
continue;
}
+ // Some LclVars always have stack homes
+ if ((varDsc->lvDoNotEnregister) || (varDsc->lvType == TYP_LCLBLK))
+ {
+ continue;
+ }
+
+ // The enregCount only tracks the uses of integer registers
+ //
+ // We could track floating point register usage seperately
+ // but it isn't worth the additional complexity as floating point CSEs
+ // are rare and we typically have plenty of floating point register available.
+ //
if (!varTypeIsFloating(varTyp))
{
- // TODO-1stClassStructs: Revisit this; it is here to duplicate previous behavior.
- // Note that this makes genTypeStSz return 1, but undoing it pessimizes some code.
- if (varTypeIsStruct(varTyp))
+ enregCount++; // The primitive types, including TYP_SIMD types use one register
+
+#ifndef _TARGET_64BIT_
+ if (varTyp == TYP_LONG)
{
- varTyp = TYP_STRUCT;
+ enregCount++; // on 32-bit targets longs use two registers
}
- enregCount += genTypeStSz(varTyp);
+#endif
}
- if ((aggressiveRefCnt == 0) && (enregCount > (CNT_CALLEE_ENREG * 3 / 2)))
+ // Set the cut off values to use for deciding when we want to use aggressive, moderate or conservative
+ //
+ // The value of aggressiveRefCnt and moderateRefCnt start off as zero and
+ // when enregCount reached a certain value we assign the current LclVar
+ // (weighted) ref count to aggressiveRefCnt or moderateRefCnt.
+ //
+ const unsigned aggressiveEnregNum = (CNT_CALLEE_ENREG * 3 / 2);
+ const unsigned moderateEnregNum = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2));
+ //
+ // On Windows x64 this yeilds:
+ // aggressiveEnregNum == 12 and moderateEnregNum == 38
+ // Thus we will typically set the cutoff values for
+ // aggressiveRefCnt based upon the weight of T13 (the 13th tracked LclVar)
+ // moderateRefCnt based upon the weight of T39 (the 39th tracked LclVar)
+ //
+ // For other architecture and platforms these values dynamically change
+ // based upon the number of callee saved and callee scratch registers.
+ //
+ if ((aggressiveRefCnt == 0) && (enregCount > aggressiveEnregNum))
{
if (CodeOptKind() == Compiler::SMALL_CODE)
{
- aggressiveRefCnt = varDsc->lvRefCnt() + BB_UNITY_WEIGHT;
+ aggressiveRefCnt = varDsc->lvRefCnt();
}
else
{
- aggressiveRefCnt = varDsc->lvRefCntWtd() + BB_UNITY_WEIGHT;
+ aggressiveRefCnt = varDsc->lvRefCntWtd();
}
+ aggressiveRefCnt += BB_UNITY_WEIGHT;
}
if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2))))
{
{
moderateRefCnt = varDsc->lvRefCntWtd();
}
+ moderateRefCnt += (BB_UNITY_WEIGHT / 2);
}
}
- unsigned mult = 3;
- // use smaller value for mult when enregCount is in [0..4]
- if (enregCount <= 4)
- {
- mult = (enregCount <= 2) ? 1 : 2;
- }
- aggressiveRefCnt = max(BB_UNITY_WEIGHT * mult, aggressiveRefCnt);
- moderateRefCnt = max((BB_UNITY_WEIGHT * mult) / 2, moderateRefCnt);
+ // The minumum value that we want to use for aggressiveRefCnt is BB_UNITY_WEIGHT * 2
+ // so increase it when we are below that value
+ //
+ aggressiveRefCnt = max(BB_UNITY_WEIGHT * 2, aggressiveRefCnt);
+
+ // The minumum value that we want to use for moderateRefCnt is BB_UNITY_WEIGHT
+ // so increase it when we are below that value
+ //
+ moderateRefCnt = max(BB_UNITY_WEIGHT, moderateRefCnt);
#ifdef DEBUG
if (m_pCompiler->verbose)
printf("\n");
printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
+ printf("enregCount is %u\n", enregCount);
printf("Framesize estimate is 0x%04X\n", frameSize);
printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
}
Compiler::CSEdsc* m_CseDsc;
unsigned m_cseIndex;
-
unsigned m_defCount;
unsigned m_useCount;
-
unsigned m_Cost;
unsigned m_Size;
+ // When this Candidate is successfully promoted to a CSE we record
+ // the following information about what category was used when promoting it.
+ //
+ // We will set m_Aggressive:
+ // When we believe that the CSE very valuable in terms of weighted ref counts,
+ // such that it would always be enregistered by the register allocator.
+ //
+ // We will set m_Moderate:
+ // When we believe that the CSE is moderately valuable in terms of weighted ref counts,
+ // such that it is more likely than not to be enregistered by the register allocator
+ //
+ // We will set m_Conservative:
+ // When we didn't set m_Aggressive or m_Moderate.
+ // Such candidates typically are expensive to compute and thus are
+ // always profitable to promote even when they aren't enregistered.
+ //
+ // We will set m_StressCSE:
+ // When the candidate is only being promoted because of a Stress mode.
+ //
+ bool m_Aggressive;
+ bool m_Moderate;
+ bool m_Conservative;
+ bool m_StressCSE;
+
public:
- CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) : m_context(context), m_CseDsc(cseDsc)
+ CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc)
+ : m_context(context)
+ , m_CseDsc(cseDsc)
+ , m_cseIndex(m_CseDsc->csdIndex)
+ , m_defCount(0)
+ , m_useCount(0)
+ , m_Cost(0)
+ , m_Size(0)
+ , m_Aggressive(false)
+ , m_Moderate(false)
+ , m_Conservative(false)
+ , m_StressCSE(false)
{
- m_cseIndex = m_CseDsc->csdIndex;
}
Compiler::CSEdsc* CseDsc()
bool LiveAcrossCall()
{
- // return (m_CseDsc->csdLiveAcrossCall != 0);
- return false; // The old behavior for now
+ return m_CseDsc->csdLiveAcrossCall;
+ }
+
+ void SetAggressive()
+ {
+ m_Aggressive = true;
+ }
+
+ bool IsAggressive()
+ {
+ return m_Aggressive;
+ }
+
+ void SetModerate()
+ {
+ m_Moderate = true;
+ }
+
+ bool IsModerate()
+ {
+ return m_Moderate;
+ }
+
+ void SetConservative()
+ {
+ m_Conservative = true;
+ }
+
+ bool IsConservative()
+ {
+ return m_Conservative;
+ }
+
+ void SetStressCSE()
+ {
+ m_StressCSE = true;
+ }
+
+ bool IsStressCSE()
+ {
+ return m_StressCSE;
}
void InitializeCounts()
if (stressResult != 0)
{
// Stress is enabled. Check whether to perform CSE or not.
- return (stressResult > 0);
+ if (stressResult > 0)
+ {
+ candidate->SetStressCSE();
+ return true;
+ }
}
if (m_pCompiler->optConfigDisableCSE2())
if (CodeOptKind() == Compiler::SMALL_CODE)
{
+ // Note that when optimizing for SMALL_CODE we set the cse_def_cost/cse_use_cost based
+ // upon the code size and we use unweighted ref counts instead of weighted ref counts.
+ // Also note that optimizing for SMALL_CODE is rare, we typically only optimize this way
+ // for class constructors, because we know that they will only run once.
+ //
if (cseRefCnt >= aggressiveRefCnt)
{
+ // Record that we are choosing to use the aggressive promotion rules
+ //
+ candidate->SetAggressive();
#ifdef DEBUG
if (m_pCompiler->verbose)
{
printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
}
#endif
- cse_def_cost = slotCount;
- cse_use_cost = slotCount;
+ // With aggressive promotion we expect that the candidate will be enregistered
+ // so we set the use and def costs to their miniumum values
+ //
+ cse_def_cost = 1;
+ cse_use_cost = 1;
+ // Check if this candidate is likely to live on the stack
+ //
if (candidate->LiveAcrossCall() || !canEnregister)
{
+ // Increase the costs when we have a large or huge frame
+ //
if (largeFrame)
{
cse_def_cost++;
}
}
}
- else if (largeFrame)
+ else // not aggressiveRefCnt
{
-#ifdef DEBUG
- if (m_pCompiler->verbose)
+ // Record that we are choosing to use the conservative promotion rules
+ //
+ candidate->SetConservative();
+ if (largeFrame)
{
- printf("Codesize CSE Promotion (%s frame)\n", hugeFrame ? "huge" : "large");
- }
+#ifdef DEBUG
+ if (m_pCompiler->verbose)
+ {
+ printf("Codesize CSE Promotion (%s frame)\n", hugeFrame ? "huge" : "large");
+ }
#endif
#ifdef _TARGET_XARCH_
- /* The following formula is good choice when optimizing CSE for SMALL_CODE */
- cse_def_cost = 6; // mov [EBP-0x00001FC],reg
- cse_use_cost = 5; // [EBP-0x00001FC]
-#else // _TARGET_ARM_
- if (hugeFrame)
- {
- cse_def_cost = 10 + (2 * slotCount); // movw/movt r10 and str reg,[sp+r10]
- cse_use_cost = 10 + (2 * slotCount);
+ /* The following formula is good choice when optimizing CSE for SMALL_CODE */
+ cse_def_cost = 6; // mov [EBP-0x00001FC],reg
+ cse_use_cost = 5; // [EBP-0x00001FC]
+#else // _TARGET_ARM_
+ if (hugeFrame)
+ {
+ cse_def_cost = 10 + 2; // movw/movt r10 and str reg,[sp+r10]
+ cse_use_cost = 10 + 2;
+ }
+ else
+ {
+ cse_def_cost = 6 + 2; // movw r10 and str reg,[sp+r10]
+ cse_use_cost = 6 + 2;
+ }
+#endif
}
- else
+ else // small frame
{
- cse_def_cost = 6 + (2 * slotCount); // movw r10 and str reg,[sp+r10]
- cse_use_cost = 6 + (2 * slotCount);
- }
-#endif
- }
- else // small frame
- {
#ifdef DEBUG
- if (m_pCompiler->verbose)
- {
- printf("Codesize CSE Promotion (small frame)\n");
- }
+ if (m_pCompiler->verbose)
+ {
+ printf("Codesize CSE Promotion (small frame)\n");
+ }
#endif
#ifdef _TARGET_XARCH_
- /* The following formula is good choice when optimizing CSE for SMALL_CODE */
- cse_def_cost = 3 * slotCount; // mov [EBP-1C],reg
- cse_use_cost = 2 * slotCount; // [EBP-1C]
-#else // _TARGET_ARM_
- cse_def_cost = 2 * slotCount; // str reg,[sp+0x9c]
- cse_use_cost = 2 * slotCount; // ldr reg,[sp+0x9c]
+ /* The following formula is good choice when optimizing CSE for SMALL_CODE */
+ cse_def_cost = 3; // mov [EBP-1C],reg
+ cse_use_cost = 2; // [EBP-1C]
+
+#else // _TARGET_ARM_
+
+ cse_def_cost = 2; // str reg,[sp+0x9c]
+ cse_use_cost = 2; // ldr reg,[sp+0x9c]
#endif
+ }
+ }
+#ifdef _TARGET_AMD64_
+ if (varTypeIsFloating(candidate->Expr()->TypeGet()))
+ {
+ // floating point loads/store encode larger
+ cse_def_cost += 2;
+ cse_use_cost += 1;
}
+#endif // _TARGET_AMD64_
}
else // not SMALL_CODE ...
{
+ // Note that when optimizing for BLENDED_CODE or FAST_CODE we set cse_def_cost/cse_use_cost
+ // based upon the execution costs of the code and we use weighted ref counts.
+ //
if ((cseRefCnt >= aggressiveRefCnt) && canEnregister)
{
+ // Record that we are choosing to use the aggressive promotion rules
+ //
+ candidate->SetAggressive();
#ifdef DEBUG
if (m_pCompiler->verbose)
{
printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
}
#endif
- cse_def_cost = slotCount;
- cse_use_cost = slotCount;
+ // With aggressive promotion we expect that the candidate will be enregistered
+ // so we set the use and def costs to their miniumum values
+ //
+ cse_def_cost = 1;
+ cse_use_cost = 1;
}
else if (cseRefCnt >= moderateRefCnt)
{
-
+ // Record that we are choosing to use the moderate promotion rules
+ //
+ candidate->SetModerate();
if (!candidate->LiveAcrossCall() && canEnregister)
{
#ifdef DEBUG
moderateRefCnt);
}
#endif
- cse_def_cost = 2 * slotCount;
- cse_use_cost = 2 * slotCount;
- extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
- // saved register
+ cse_def_cost = 2;
+ if (canEnregister)
+ {
+ if (enregCount < (CNT_CALLEE_ENREG * 3 / 2))
+ {
+ cse_use_cost = 1;
+ }
+ else
+ {
+ cse_use_cost = 2;
+ }
+ }
+ else
+ {
+ cse_use_cost = 3;
+ }
}
}
else // Conservative CSE promotion
{
+ // Record that we are choosing to use the conservative promotion rules
+ //
+ candidate->SetConservative();
if (!candidate->LiveAcrossCall() && canEnregister)
{
#ifdef DEBUG
printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
}
#endif
- cse_def_cost = 3 * slotCount;
- cse_use_cost = 3 * slotCount;
- extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
- // saved register
+ cse_def_cost = 2;
+ cse_use_cost = 3;
}
// If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
{
- cse_def_cost += slotCount;
- cse_use_cost += slotCount;
+ cse_def_cost += 1;
+ cse_use_cost += 1;
}
}
+ }
- if (largeFrame)
- {
- cse_def_cost++;
- cse_use_cost++;
- }
- if (hugeFrame)
+ if (slotCount > 1)
+ {
+ cse_def_cost *= slotCount;
+ cse_use_cost *= slotCount;
+ }
+
+ // If this CSE is live across a call then we may need to spill an additional caller save register
+ //
+ if (candidate->LiveAcrossCall())
+ {
+ // If we don't have a lot of variables to enregister or we have a floating point type
+ // then we will likely need to spill an additional caller save register.
+ //
+ if ((enregCount < (CNT_CALLEE_ENREG * 3 / 2)) || varTypeIsFloating(candidate->Expr()->TypeGet()))
{
- cse_def_cost++;
- cse_use_cost++;
+ // Extra cost in case we have to spill/restore a caller saved register
+ extra_yes_cost = BB_UNITY_WEIGHT;
+
+ if (cseRefCnt < moderateRefCnt) // If Conservative CSE promotion
+ {
+ extra_yes_cost *= 2; // full cost if we are being Conservative
+ }
}
}
printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost,
(no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
}
-#endif
+#endif // DEBUG
// Should we make this candidate into a CSE?
// Is the yes cost less than the no cost
}
}
+#ifdef DEBUG
+ // Setup the message arg for lvaGrabTemp()
+ //
+ const char* grabTempMessage = "CSE - unknown";
+
+ if (successfulCandidate->IsAggressive())
+ {
+ grabTempMessage = "CSE - aggressive";
+ }
+ else if (successfulCandidate->IsModerate())
+ {
+ grabTempMessage = "CSE - moderate";
+ }
+ else if (successfulCandidate->IsConservative())
+ {
+ grabTempMessage = "CSE - conservative";
+ }
+ else if (successfulCandidate->IsStressCSE())
+ {
+ grabTempMessage = "CSE - stress mode";
+ }
+#endif // DEBUG
+
/* Introduce a new temp for the CSE */
- // we will create a long lifetime temp for the new cse LclVar
- unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE"));
+ // we will create a long lifetime temp for the new CSE LclVar
+ unsigned cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG(grabTempMessage));
var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
if (varTypeIsStruct(cseLclVarTyp))
{
if (dsc->csdDefCount == 1)
{
- JITDUMP("CSE #%02u is single-def, so associated cse temp V%02u will be in SSA\n", dsc->csdIndex,
+ JITDUMP("CSE #%02u is single-def, so associated CSE temp V%02u will be in SSA\n", dsc->csdIndex,
cseLclVarNum);
m_pCompiler->lvaTable[cseLclVarNum].lvInSsa = true;
noway_assert(asg->AsOp()->gtOp1->gtOper == GT_LCL_VAR);
- // Backpatch the SSA def, if we're putting this cse temp into ssa.
+ // Backpatch the SSA def, if we're putting this CSE temp into ssa.
asg->AsOp()->gtOp1->AsLclVar()->SetSsaNum(cseSsaNum);
if (cseSsaNum != SsaConfig::RESERVED_SSA_NUM)
noway_assert(link);
- // Mutate this link, thus replacing the old exp with the new cse representation
+ // Mutate this link, thus replacing the old exp with the new CSE representation
//
*link = cse;
m_pCompiler->gtDispTree(candidate.Expr());
printf("\n");
}
-#endif
+#endif // DEBUG
if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0))
{