for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
{
+ // The logic below is complex. Make sure we are not
+ // double-counting the initialization impact of any locals.
+ bool counted = false;
+
if (varDsc->lvIsParam)
{
continue;
// Var is on the stack at entry.
initStkLclCnt +=
roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
+ counted = true;
}
}
else
// Var is partially enregistered
noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
initStkLclCnt += genTypeStSz(TYP_INT);
+ counted = true;
}
}
}
if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) && varDsc->lvOnFrame &&
(!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
{
+
varDsc->lvMustInit = true;
- initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
+ if (!counted)
+ {
+ initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int);
+ counted = true;
+ }
}
continue;
if (varDsc->lvMustInit && varDsc->lvOnFrame)
{
- initStkLclCnt += varDsc->lvStructGcCount;
+ if (!counted)
+ {
+ initStkLclCnt += varDsc->lvStructGcCount;
+ counted = true;
+ }
}
if ((compiler->lvaLclSize(varNum) > (3 * TARGET_POINTER_SIZE)) && (largeGcStructs <= 4))
}
}
- // After debugging this further it was found that this logic is incorrect:
- // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
- // and this also double counts variables (we saw this in the debugger) around line 4829.
- // Even though this doesn't pose a problem with correctness it will improperly decide to
- // zero init the stack using a block operation instead of a 'case by case' basis.
+ // Record number of 4 byte slots that need zeroing.
genInitStkLclCnt = initStkLclCnt;
- /* If we have more than 4 untracked locals, use block initialization */
- /* TODO-Review: If we have large structs, bias toward not using block initialization since
- we waste all the other slots. Really need to compute the correct
- and compare that against zeroing the slots individually */
+ // Decide if we will do block initialization in the prolog, or use
+ // a series of individual stores.
+ //
+ // Primary factor is the number of slots that need zeroing. We've
+ // been counting by sizeof(int) above. We assume for now we can
+ // only zero register width bytes per store.
+ //
+ // Current heuristic is to use block init when more than 4 stores
+ // are required.
+ //
+ // Secondary factor is the presence of large structs that
+ // potentially only need some fields set to zero. We likely don't
+ // model this very well, but have left the logic as is for now.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+
+ genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 8));
+
+#else
genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
+#endif // _TARGET_64BIT_
+
if (genUseBlockInit)
{
regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
{
if (genInitStkLclCnt > 0)
{
- printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
- -untrLclHi);
+ printf("Found %u lvMustInit int-sized stack slots, frame offsets %d through %d\n", genInitStkLclCnt,
+ -untrLclLo, -untrLclHi);
}
}
#endif