unsigned slots = cpObjNode->gtSlots;
- // Temp register used to perform the sequence of loads and stores.
- regNumber tmpReg = cpObjNode->ExtractTempReg();
- regNumber tmpReg2 = (slots > 1) ? cpObjNode->GetSingleTempReg() : REG_NA;
- assert(genIsValidIntReg(tmpReg) && (tmpReg != REG_WRITE_BARRIER_SRC_BYREF) && (tmpReg != REG_WRITE_BARRIER_DST_BYREF));
- assert((slots == 1) || (genIsValidIntReg(tmpReg2) && (tmpReg2 != tmpReg) && (tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF) && (tmpReg2 != REG_WRITE_BARRIER_DST_BYREF)));
+ // Temp register(s) used to perform the sequence of loads and stores.
+ regNumber tmpReg = cpObjNode->ExtractTempReg();
+ regNumber tmpReg2 = REG_NA;
- emitter* emit = getEmitter();
+ assert(genIsValidIntReg(tmpReg));
+ assert(tmpReg != REG_WRITE_BARRIER_SRC_BYREF);
+ assert(tmpReg != REG_WRITE_BARRIER_DST_BYREF);
+
+ if (slots > 1)
+ {
+ tmpReg2 = cpObjNode->GetSingleTempReg();
+ assert(tmpReg2 != tmpReg);
+ assert(genIsValidIntReg(tmpReg2));
+ assert(tmpReg2 != REG_WRITE_BARRIER_DST_BYREF);
+ assert(tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF);
+ }
+
+ emitter* emit = getEmitter();
BYTE* gcPtrs = cpObjNode->gtGcPtrs;
else if (gcPtrs[i] == GCT_BYREF)
attr = EA_BYREF;
- if((i + 1 < slots) && (gcPtrs[i] == gcPtrs[i + 1]))
+ // Check if two or more remaining slots and use a ldp/stp sequence
+ // TODO-ARM64-CQ: Current limitations only allows using ldp/stp when both of the GC types match
+ if ((i + 1 < slots) && (gcPtrs[i] == gcPtrs[i + 1]))
{
- emit->emitIns_R_R_R_I(INS_ldp, attr, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2*TARGET_POINTER_SIZE,
- INS_OPTS_POST_INDEX);
- emit->emitIns_R_R_R_I(INS_stp, attr, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2*TARGET_POINTER_SIZE,
- INS_OPTS_POST_INDEX);
- ++i;
+ emit->emitIns_R_R_R_I(INS_ldp, attr, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
+ 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ emit->emitIns_R_R_R_I(INS_stp, attr, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
+ 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ ++i; // extra increment of i, since we are copying two items
}
else
{
switch (gcPtrs[i])
{
case TYPE_GC_NONE:
- if((i + 1 < slots) && (gcPtrs[i] == gcPtrs[i + 1]))
+ // Check if the next slot's type is also TYP_GC_NONE and use ldp/stp
+ if ((i + 1 < slots) && (gcPtrs[i + 1] == TYPE_GC_NONE))
{
- emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2*TARGET_POINTER_SIZE,
- INS_OPTS_POST_INDEX);
- emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2*TARGET_POINTER_SIZE,
- INS_OPTS_POST_INDEX);
- ++i;
+ emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF,
+ 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF,
+ 2 * TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ ++i; // extra increment of i, since we are copying two items
}
else
{
if (size >= 2 * REGSIZE_BYTES)
{
- // Use ldp/stp to reduce code size and improve performance
+ // We will use ldp/stp to reduce code size and improve performance
+ // so we need to reserve an extra internal register
blkNode->gtLsraInfo.internalIntCount++;
}
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF));
+ // We can't use the special Write Barrier registers, so exclude them from the mask
+ regMaskTP internalIntCandidates = RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
+ blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
+ // If we have a source address we want it in RBM_WRITE_BARRIER_DST_BYREF.
dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
+
// If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
// Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
// which is killed by a StoreObj (and thus needn't be reserved).
if (size >= 2 * REGSIZE_BYTES)
{
- // Use ldp/stp to reduce code size and improve performance
+ // We will use ldp/stp to reduce code size and improve performance
+ // so we need to reserve an extra internal register
internalIntCount++;
}