#endif
break;
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
case GT_STORE_BLK:
genCodeForStoreBlk(treeNode->AsBlk());
}
//------------------------------------------------------------------------
-// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
+// genCodeForStoreBlk: Produce code for a GT_STORE_DYN_BLK/GT_STORE_BLK node.
//
// Arguments:
// tree - the node
//
void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
{
- assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
-
- if (blkOp->OperIs(GT_STORE_OBJ))
- {
- assert(!blkOp->gtBlkOpGcUnsafe);
- assert(blkOp->OperIsCopyBlkOp());
- assert(blkOp->AsBlk()->GetLayout()->HasGCPtr());
- genCodeForCpObj(blkOp->AsBlk());
- return;
- }
+ assert(blkOp->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));
bool isCopyBlk = blkOp->OperIsCopyBlkOp();
switch (blkOp->gtBlkOpKind)
{
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
+ assert(!blkOp->gtBlkOpGcUnsafe);
+ genCodeForCpObj(blkOp->AsBlk());
+ break;
+
case GenTreeBlk::BlkOpKindHelper:
assert(!blkOp->gtBlkOpGcUnsafe);
if (isCopyBlk)
emit->emitIns_R_L(INS_ld_d, EA_PTRSIZE, genPendingCallLabel, targetReg);
break;
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
case GT_STORE_BLK:
genCodeForStoreBlk(treeNode->AsBlk());
}
//------------------------------------------------------------------------
-// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
+// genCodeForStoreBlk: Produce code for a GT_STORE_DYN_BLK/GT_STORE_BLK node.
//
// Arguments:
// tree - the node
//
void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
{
- assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
+ assert(blkOp->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));
- if (blkOp->OperIs(GT_STORE_OBJ))
- {
- assert(!blkOp->gtBlkOpGcUnsafe);
- assert(blkOp->OperIsCopyBlkOp());
- assert(blkOp->AsBlk()->GetLayout()->HasGCPtr());
- genCodeForCpObj(blkOp->AsBlk());
- return;
- }
if (blkOp->gtBlkOpGcUnsafe)
{
GetEmitter()->emitDisableGC();
}
+
bool isCopyBlk = blkOp->OperIsCopyBlkOp();
switch (blkOp->gtBlkOpKind)
{
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
+ assert(!blkOp->gtBlkOpGcUnsafe);
+ genCodeForCpObj(blkOp->AsBlk());
+ break;
+
case GenTreeBlk::BlkOpKindHelper:
if (isCopyBlk)
{
emit->emitIns_R_L(INS_ld, EA_PTRSIZE, genPendingCallLabel, targetReg);
break;
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
case GT_STORE_BLK:
genCodeForStoreBlk(treeNode->AsBlk());
}
//------------------------------------------------------------------------
-// genCodeForStoreBlk: Produce code for a GT_STORE_OBJ/GT_STORE_DYN_BLK/GT_STORE_BLK node.
+// genCodeForStoreBlk: Produce code for a GT_STORE_DYN_BLK/GT_STORE_BLK node.
//
// Arguments:
// tree - the node
//
void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
{
- assert(blkOp->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
+ assert(blkOp->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));
- if (blkOp->OperIs(GT_STORE_OBJ))
- {
- assert(!blkOp->gtBlkOpGcUnsafe);
- assert(blkOp->OperIsCopyBlkOp());
- assert(blkOp->AsBlk()->GetLayout()->HasGCPtr());
- genCodeForCpObj(blkOp->AsBlk());
- return;
- }
if (blkOp->gtBlkOpGcUnsafe)
{
GetEmitter()->emitDisableGC();
switch (blkOp->gtBlkOpKind)
{
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
+ assert(!blkOp->gtBlkOpGcUnsafe);
+ genCodeForCpObj(blkOp->AsBlk());
+ break;
+
case GenTreeBlk::BlkOpKindHelper:
if (isCopyBlk)
{
emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->GetRegNum());
break;
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
case GT_STORE_BLK:
genCodeForStoreBlk(treeNode->AsBlk());
void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
{
- assert(storeBlkNode->OperIs(GT_STORE_OBJ, GT_STORE_DYN_BLK, GT_STORE_BLK));
-
- if (storeBlkNode->OperIs(GT_STORE_OBJ))
- {
-#ifndef JIT32_GCENCODER
- assert(!storeBlkNode->gtBlkOpGcUnsafe);
-#endif
- assert(storeBlkNode->OperIsCopyBlkOp());
- assert(storeBlkNode->AsBlk()->GetLayout()->HasGCPtr());
- genCodeForCpObj(storeBlkNode->AsBlk());
- return;
- }
+ assert(storeBlkNode->OperIs(GT_STORE_DYN_BLK, GT_STORE_BLK));
bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
switch (storeBlkNode->gtBlkOpKind)
{
+ case GenTreeBlk::BlkOpKindCpObjRepInstr:
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
+#ifndef JIT32_GCENCODER
+ assert(!storeBlkNode->gtBlkOpGcUnsafe);
+#endif
+ genCodeForCpObj(storeBlkNode->AsBlk());
+ break;
+
#ifdef TARGET_AMD64
case GenTreeBlk::BlkOpKindHelper:
assert(!storeBlkNode->gtBlkOpGcUnsafe);
// GC pointers.
//
// Arguments:
-// cpObjNode - the GT_STORE_OBJ
+// cpObjNode - the GT_STORE_BLK node
//
// Notes:
// This will generate a sequence of movsp instructions for the cases of non-gc members.
}
break;
- case GT_STORE_OBJ:
- if (tree->AsBlk()->GetLayout()->HasGCPtr())
- {
- chars += printf("[BLK_HASGCPTR]");
- }
- FALLTHROUGH;
-
case GT_BLK:
case GT_STORE_BLK:
case GT_STORE_DYN_BLK:
break;
case GT_STORE_BLK:
- case GT_STORE_OBJ:
hash ^= PtrToUlong(tree->AsBlk()->GetLayout());
break;
case GT_CMPXCHG:
case GT_BLK:
case GT_STORE_BLK:
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
case GT_BOX:
case GT_ARR_INDEX:
// Handle the special cases.
// The following opers are in GTSTRUCT_N but no other place (namely, no subtypes).
- case GT_STORE_OBJ:
case GT_STORE_BLK:
case GT_BLK:
{
case GT_LEA:
case GT_BLK:
case GT_STORE_BLK:
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
case GT_IND:
{
ClassLayout* layout = nullptr;
- if (tree->OperIs(GT_BLK, GT_STORE_BLK, GT_STORE_OBJ))
+ if (tree->OperIs(GT_BLK, GT_STORE_BLK))
{
layout = tree->AsBlk()->GetLayout();
}
static bool OperIsStoreBlk(genTreeOps gtOper)
{
- return StaticOperIs(gtOper, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYN_BLK);
+ return StaticOperIs(gtOper, GT_STORE_BLK, GT_STORE_DYN_BLK);
}
bool OperIsStoreBlk() const
// OperIsIndir() returns true also for indirection nodes such as GT_BLK, etc. as well as GT_NULLCHECK.
static bool OperIsIndir(genTreeOps gtOper)
{
- static_assert_no_msg(
- AreContiguous(GT_IND, GT_STOREIND, GT_STORE_OBJ, GT_BLK, GT_STORE_BLK, GT_STORE_DYN_BLK, GT_NULLCHECK));
+ static_assert_no_msg(AreContiguous(GT_IND, GT_STOREIND, GT_BLK, GT_STORE_BLK, GT_STORE_DYN_BLK, GT_NULLCHECK));
return (GT_IND <= gtOper) && (gtOper <= GT_NULLCHECK);
}
enum
{
BlkOpKindInvalid,
+ BlkOpKindCpObjUnroll,
+#ifdef TARGET_XARCH
+ BlkOpKindCpObjRepInstr,
+#endif
#ifndef TARGET_X86
BlkOpKindHelper,
#endif
GTNODE(IND , GenTreeIndir ,0,GTK_UNOP) // Load indirection
GTNODE(STOREIND , GenTreeStoreInd ,0,GTK_BINOP|GTK_NOVALUE) // Store indirection
-GTNODE(STORE_OBJ , GenTreeBlk ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Store for struct objects with GC pointers
-GTNODE(BLK , GenTreeBlk ,0,GTK_UNOP|GTK_EXOP) // Block/struct object
-GTNODE(STORE_BLK , GenTreeBlk ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Block/struct object store
+GTNODE(BLK , GenTreeBlk ,0,GTK_UNOP|GTK_EXOP) // Struct load
+GTNODE(STORE_BLK , GenTreeBlk ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Struct store
GTNODE(STORE_DYN_BLK , GenTreeStoreDynBlk ,0,GTK_SPECIAL|GTK_NOVALUE) // Dynamically sized block store, with native uint size
GTNODE(NULLCHECK , GenTreeIndir ,0,GTK_UNOP|GTK_NOVALUE) // Null checks the source
GTSTRUCT_1(ClsVar , GT_CLS_VAR_ADDR)
GTSTRUCT_1(CmpXchg , GT_CMPXCHG)
GTSTRUCT_1(AddrMode , GT_LEA)
-GTSTRUCT_N(Blk , GT_BLK, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYN_BLK)
+GTSTRUCT_N(Blk , GT_BLK, GT_STORE_BLK, GT_STORE_DYN_BLK)
GTSTRUCT_1(StoreDynBlk , GT_STORE_DYN_BLK)
GTSTRUCT_1(Qmark , GT_QMARK)
GTSTRUCT_1(PhiArg , GT_PHI_ARG)
GTSTRUCT_1(Phi , GT_PHI)
GTSTRUCT_1(StoreInd , GT_STOREIND)
-GTSTRUCT_N(Indir , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYN_BLK)
+GTSTRUCT_N(Indir , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_STORE_DYN_BLK)
GTSTRUCT_N(Conditional , GT_SELECT)
#if FEATURE_ARG_SPLIT
GTSTRUCT_2_SPECIAL(PutArgStk, GT_PUTARG_STK, GT_PUTARG_SPLIT)
break;
case GT_STOREIND:
- case GT_STORE_OBJ:
case GT_STORE_BLK:
case GT_STORE_DYN_BLK:
case GT_MEMORYBARRIER: // Similar to Volatile indirections, we must handle this as a memory def.
if (isDeadStore)
{
LIR::Use addrUse;
- if (blockRange.TryGetUse(node, &addrUse) &&
- (addrUse.User()->OperIs(GT_STOREIND, GT_STORE_BLK, GT_STORE_OBJ)))
+ if (blockRange.TryGetUse(node, &addrUse) && (addrUse.User()->OperIs(GT_STOREIND, GT_STORE_BLK)))
{
GenTreeIndir* const store = addrUse.User()->AsIndir();
case GT_JMP:
case GT_STOREIND:
case GT_BOUNDS_CHECK:
- case GT_STORE_OBJ:
case GT_STORE_BLK:
case GT_STORE_DYN_BLK:
case GT_JCMP:
break;
case GT_STORE_BLK:
- case GT_STORE_OBJ:
if (node->AsBlk()->Data()->IsCall())
{
LowerStoreSingleRegCallStruct(node->AsBlk());
addr->gtFlags |= lclStore->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG);
// Create the assignment node.
- lclStore->ChangeOper(GT_STORE_OBJ);
+ lclStore->ChangeOper(GT_STORE_BLK);
GenTreeBlk* objStore = lclStore->AsBlk();
objStore->gtFlags = GTF_ASG | GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP;
objStore->Initialize(layout);
case GT_RETURN:
case GT_STORE_LCL_VAR:
case GT_STORE_BLK:
- case GT_STORE_OBJ:
// Leave as is, the user will handle it.
assert(user->TypeIs(origType) || varTypeIsSIMD(user->TypeGet()));
break;
// Other 64 bites ABI-s support passing 5, 6, 7 byte structs.
unreached();
#else // !WINDOWS_AMD64_ABI
- if (store->OperIs(GT_STORE_OBJ))
- {
- store->SetOper(GT_STORE_BLK);
- }
- store->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
-
+ store->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
GenTreeLclVar* spilledCall = SpillStructCallResult(call);
store->SetData(spilledCall);
LowerBlockStoreCommon(store);
}
else
{
- // If the `ADDR` node under `STORE_OBJ(dstAddr, IND(struct(ADDR))`
+ // If the `ADDR` node under `STORE_BLK(dstAddr, IND(struct(ADDR))`
// is a complex one it could benefit from an `LEA` that is not contained.
const bool isContainable = false;
TryCreateAddrMode(ind->Addr(), isContainable, ind);
}
//------------------------------------------------------------------------
-// LowerBlockStoreCommon: a common logic to lower STORE_OBJ/BLK/DYN_BLK.
+// LowerBlockStoreCommon: a common logic to lower STORE_BLK/DYN_BLK.
//
// Arguments:
// blkNode - the store blk/obj node we are lowering.
//
void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode)
{
- assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK, GT_STORE_OBJ));
+ assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
// Lose the type information stored in the source - we no longer need it.
if (blkNode->Data()->OperIs(GT_BLK))
}
//------------------------------------------------------------------------
-// TryTransformStoreObjAsStoreInd: try to replace STORE_OBJ/BLK as STOREIND.
+// TryTransformStoreObjAsStoreInd: try to replace STORE_BLK as STOREIND.
//
// Arguments:
// blkNode - the store node.
// Notes:
// TODO-CQ: this method should do the transformation when possible
// and STOREIND should always generate better or the same code as
-// STORE_OBJ/BLK for the same copy.
+// STORE_BLK for the same copy.
//
bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode)
{
- assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK, GT_STORE_OBJ));
+ assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK));
if (!comp->opts.OptimizationEnabled())
{
return false;
return false;
}
- ClassLayout* layout = blkNode->GetLayout();
- if (layout == nullptr)
- {
- return false;
- }
-
- var_types regType = layout->GetRegisterType();
+ var_types regType = blkNode->GetLayout()->GetRegisterType();
if (regType == TYP_UNDEF)
{
return false;
if (varTypeIsGC(regType))
{
// TODO-CQ: STOREIND does not try to contain src if we need a barrier,
- // STORE_OBJ generates better code currently.
+ // STORE_BLK generates better code currently.
return false;
}
return false;
}
- JITDUMP("Replacing STORE_OBJ with STOREIND for [%06u]\n", blkNode->gtTreeID);
+ JITDUMP("Replacing STORE_BLK with STOREIND for [%06u]\n", blkNode->gtTreeID);
blkNode->ChangeOper(GT_STOREIND);
blkNode->ChangeType(regType);
src = src->AsUnOp()->gtGetOp1();
}
- if (blkNode->OperIs(GT_STORE_OBJ))
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
-
if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) &&
src->OperIs(GT_CNS_INT))
{
assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD));
src->SetContained();
- if (src->OperIs(GT_IND))
+ if (src->OperIs(GT_LCL_VAR))
{
- GenTree* srcAddr = src->AsIndir()->Addr();
- // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained.
- // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the
- // address, not knowing that GT_IND is part of a block op that has containment restrictions.
- srcAddr->ClearContained();
- }
- else
- {
- if (src->OperIs(GT_LCL_VAR))
- {
- // TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register.
- const unsigned srcLclNum = src->AsLclVar()->GetLclNum();
- comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
- }
+ // TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register.
+ const unsigned srcLclNum = src->AsLclVar()->GetLclNum();
+ comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}
+ bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
- if (blkNode->OperIs(GT_STORE_OBJ))
+ if (doCpObj && isDstAddrLocal && (size <= copyBlockUnrollLimit))
{
- if (!blkNode->AsBlk()->GetLayout()->HasGCPtr())
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
- else if (isDstAddrLocal && (size <= copyBlockUnrollLimit))
- {
- blkNode->SetOper(GT_STORE_BLK);
- blkNode->gtBlkOpGcUnsafe = true;
- }
+ doCpObj = false;
+ blkNode->gtBlkOpGcUnsafe = true;
}
- if (blkNode->OperIs(GT_STORE_OBJ))
+ if (doCpObj)
{
assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL));
-
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
}
else if (blkNode->OperIs(GT_STORE_BLK) && (size <= copyBlockUnrollLimit))
{
src->SetContained();
src = src->AsUnOp()->gtGetOp1();
}
- if (blkNode->OperIs(GT_STORE_OBJ))
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) &&
src->OperIs(GT_CNS_INT))
assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD));
src->SetContained();
- if (src->OperIs(GT_IND))
- {
- // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained.
- // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the
- // address, not knowing that GT_IND is part of a block op that has containment restrictions.
- src->AsIndir()->Addr()->ClearContained();
- }
- else if (src->OperIs(GT_LCL_VAR))
+ if (src->OperIs(GT_LCL_VAR))
{
// TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register.
const unsigned srcLclNum = src->AsLclVar()->GetLclNum();
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}
+ bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
unsigned copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
- if (blkNode->OperIs(GT_STORE_OBJ))
+ if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) && (size <= copyBlockUnrollLimit))
{
- if (!blkNode->AsBlk()->GetLayout()->HasGCPtr())
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
- else if (dstAddr->OperIs(GT_LCL_ADDR) && (size <= copyBlockUnrollLimit))
- {
- // If the size is small enough to unroll then we need to mark the block as non-interruptible
- // to actually allow unrolling. The generated code does not report GC references loaded in the
- // temporary register(s) used for copying.
- blkNode->SetOper(GT_STORE_BLK);
- blkNode->gtBlkOpGcUnsafe = true;
- }
+ // If the size is small enough to unroll then we need to mark the block as non-interruptible
+ // to actually allow unrolling. The generated code does not report GC references loaded in the
+ // temporary register(s) used for copying.
+ doCpObj = false;
+ blkNode->gtBlkOpGcUnsafe = true;
}
// CopyObj or CopyBlk
- if (blkNode->OperIs(GT_STORE_OBJ))
+ if (doCpObj)
{
assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL));
-
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
}
else if (blkNode->OperIs(GT_STORE_BLK) && (size <= copyBlockUnrollLimit))
{
src->SetContained();
src = src->AsUnOp()->gtGetOp1();
}
- if (blkNode->OperIs(GT_STORE_OBJ))
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= INITBLK_UNROLL_LIMIT) && src->OperIs(GT_CNS_INT))
{
assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD));
src->SetContained();
- if (src->OperIs(GT_IND))
- {
- // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained.
- // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the
- // address, not knowing that GT_IND is part of a block op that has containment restrictions.
- src->AsIndir()->Addr()->ClearContained();
- }
- else if (src->OperIs(GT_LCL_VAR))
+ if (src->OperIs(GT_LCL_VAR))
{
// TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register.
const unsigned srcLclNum = src->AsLclVar()->GetLclNum();
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
}
- if (blkNode->OperIs(GT_STORE_OBJ))
+
+ bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && blkNode->GetLayout()->HasGCPtr();
+ if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) && (size <= CPBLK_UNROLL_LIMIT))
{
- if (!blkNode->AsBlk()->GetLayout()->HasGCPtr())
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
- else if (dstAddr->OperIs(GT_LCL_ADDR) && (size <= CPBLK_UNROLL_LIMIT))
- {
- // If the size is small enough to unroll then we need to mark the block as non-interruptible
- // to actually allow unrolling. The generated code does not report GC references loaded in the
- // temporary register(s) used for copying.
- blkNode->SetOper(GT_STORE_BLK);
- blkNode->gtBlkOpGcUnsafe = true;
- }
+ // If the size is small enough to unroll then we need to mark the block as non-interruptible
+ // to actually allow unrolling. The generated code does not report GC references loaded in the
+ // temporary register(s) used for copying.
+ doCpObj = false;
+ blkNode->gtBlkOpGcUnsafe = true;
}
// CopyObj or CopyBlk
- if (blkNode->OperIs(GT_STORE_OBJ))
+ if (doCpObj)
{
assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL));
-
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
}
- ////////////////////////////////////////////////////////////////////////////////////////////////////////
else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT))
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
src = src->AsUnOp()->gtGetOp1();
}
- if (blkNode->OperIs(GT_STORE_OBJ))
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
-
if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)))
{
if (!src->OperIs(GT_CNS_INT))
assert(src->OperIs(GT_IND, GT_LCL_VAR, GT_LCL_FLD));
src->SetContained();
- if (src->OperIs(GT_IND))
- {
- // TODO-Cleanup: Make sure that GT_IND lowering didn't mark the source address as contained.
- // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the
- // address, not knowing that GT_IND is part of a block op that has containment restrictions.
- src->AsIndir()->Addr()->ClearContained();
- }
- else if (src->OperIs(GT_LCL_VAR))
+ if (src->OperIs(GT_LCL_VAR))
{
// TODO-1stClassStructs: for now we can't work with STORE_BLOCK source in register.
const unsigned srcLclNum = src->AsLclVar()->GetLclNum();
comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::StoreBlkSrc));
}
- if (blkNode->OperIs(GT_STORE_OBJ))
- {
- if (!blkNode->AsBlk()->GetLayout()->HasGCPtr())
- {
- blkNode->SetOper(GT_STORE_BLK);
- }
+ ClassLayout* layout = blkNode->GetLayout();
+ bool doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
+
#ifndef JIT32_GCENCODER
- else if (dstAddr->OperIs(GT_LCL_ADDR) &&
- (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false)))
- {
- // If the size is small enough to unroll then we need to mark the block as non-interruptible
- // to actually allow unrolling. The generated code does not report GC references loaded in the
- // temporary register(s) used for copying.
- // This is not supported for the JIT32_GCENCODER.
- blkNode->SetOper(GT_STORE_BLK);
- blkNode->gtBlkOpGcUnsafe = true;
- }
-#endif
+ if (doCpObj && dstAddr->OperIs(GT_LCL_ADDR) &&
+ (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, false)))
+ {
+ // If the size is small enough to unroll then we need to mark the block as non-interruptible
+ // to actually allow unrolling. The generated code does not report GC references loaded in the
+ // temporary register(s) used for copying. This is not supported for the JIT32_GCENCODER.
+ doCpObj = false;
+ blkNode->gtBlkOpGcUnsafe = true;
}
+#endif
- if (blkNode->OperIs(GT_STORE_OBJ))
+ if (doCpObj)
{
assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL));
if (dstAddr->OperIs(GT_LCL_ADDR))
{
// If the destination is on the stack then no write barriers are needed.
- nonGCSlots = blkNode->GetLayout()->GetSlotCount();
+ nonGCSlots = layout->GetSlotCount();
}
else
{
// Otherwise a write barrier is needed for every GC pointer in the layout
// so we need to check if there's a long enough sequence of non-GC slots.
- ClassLayout* layout = blkNode->GetLayout();
- unsigned slots = layout->GetSlotCount();
+ unsigned slots = layout->GetSlotCount();
for (unsigned i = 0; i < slots; i++)
{
if (layout->IsGCPtr(i))
if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
{
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjRepInstr;
}
else
{
- blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
}
}
else if (blkNode->OperIs(GT_STORE_BLK) &&
- (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, !blkNode->GetLayout()->HasGCPtr())))
+ (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy, !layout->HasGCPtr())))
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
}
+ assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
+
#ifndef TARGET_X86
if ((MIN_ARG_AREA_FOR_CALL > 0) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper))
{
break;
case GT_STORE_BLK:
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
srcCount = BuildBlockStore(tree->AsBlk());
break;
break;
case GT_STORE_BLK:
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
srcCount = BuildBlockStore(tree->AsBlk());
break;
srcAddrOrFill = src->AsIndir()->Addr();
}
- if (blkNode->OperIs(GT_STORE_OBJ))
+ switch (blkNode->gtBlkOpKind)
{
- // We don't need to materialize the struct size but we still need
- // a temporary register to perform the sequence of loads and stores.
- // We can't use the special Write Barrier registers, so exclude them from the mask
- regMaskTP internalIntCandidates =
- allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
- buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
-
- if (size >= 2 * REGSIZE_BYTES)
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
{
- // We will use ldp/stp to reduce code size and improve performance
- // so we need to reserve an extra internal register
+ // We don't need to materialize the struct size but we still need
+ // a temporary register to perform the sequence of loads and stores.
+ // We can't use the special Write Barrier registers, so exclude them from the mask
+ regMaskTP internalIntCandidates =
+ allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
- }
- // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
- dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
+ if (size >= 2 * REGSIZE_BYTES)
+ {
+ // We will use ldp/stp to reduce code size and improve performance
+ // so we need to reserve an extra internal register
+ buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
+ }
+
+ // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
+ dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
- // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
- // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
- // which is killed by a StoreObj (and thus needn't be reserved).
- if (srcAddrOrFill != nullptr)
- {
- assert(!srcAddrOrFill->isContained());
- srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
+ // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
+ // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
+ // which is killed by a StoreObj (and thus needn't be reserved).
+ if (srcAddrOrFill != nullptr)
+ {
+ assert(!srcAddrOrFill->isContained());
+ srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
+ }
}
- }
- else
- {
- switch (blkNode->gtBlkOpKind)
+ break;
+
+ case GenTreeBlk::BlkOpKindUnroll:
{
- case GenTreeBlk::BlkOpKindUnroll:
- {
- buildInternalIntRegisterDefForNode(blkNode);
+ buildInternalIntRegisterDefForNode(blkNode);
#ifdef TARGET_ARM64
- const bool canUseLoadStorePairIntRegsInstrs = (size >= 2 * REGSIZE_BYTES);
+ const bool canUseLoadStorePairIntRegsInstrs = (size >= 2 * REGSIZE_BYTES);
- if (canUseLoadStorePairIntRegsInstrs)
- {
- // CodeGen can use ldp/stp instructions sequence.
- buildInternalIntRegisterDefForNode(blkNode);
- }
+ if (canUseLoadStorePairIntRegsInstrs)
+ {
+ // CodeGen can use ldp/stp instructions sequence.
+ buildInternalIntRegisterDefForNode(blkNode);
+ }
- const bool isSrcAddrLocal = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ||
- ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIs(GT_LCL_ADDR));
- const bool isDstAddrLocal = dstAddr->OperIs(GT_LCL_ADDR);
+ const bool isSrcAddrLocal = src->OperIs(GT_LCL_VAR, GT_LCL_FLD) ||
+ ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIs(GT_LCL_ADDR));
+ const bool isDstAddrLocal = dstAddr->OperIs(GT_LCL_ADDR);
- // CodeGen can use 16-byte SIMD ldp/stp for larger block sizes.
- // This is the case, when both registers are either sp or fp.
- bool canUse16ByteWideInstrs = (size >= 2 * FP_REGSIZE_BYTES);
+ // CodeGen can use 16-byte SIMD ldp/stp for larger block sizes.
+ // This is the case, when both registers are either sp or fp.
+ bool canUse16ByteWideInstrs = (size >= 2 * FP_REGSIZE_BYTES);
- // Note that the SIMD registers allocation is speculative - LSRA doesn't know at this point
- // whether CodeGen will use SIMD registers (i.e. if such instruction sequence will be more optimal).
- // Therefore, it must allocate an additional integer register anyway.
- if (canUse16ByteWideInstrs)
- {
- buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
- buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
- }
+ // Note that the SIMD registers allocation is speculative - LSRA doesn't know at this point
+ // whether CodeGen will use SIMD registers (i.e. if such instruction sequence will be more optimal).
+ // Therefore, it must allocate an additional integer register anyway.
+ if (canUse16ByteWideInstrs)
+ {
+ buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
+ buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
+ }
- const bool srcAddrMayNeedReg =
- isSrcAddrLocal || ((srcAddrOrFill != nullptr) && srcAddrOrFill->isContained());
- const bool dstAddrMayNeedReg = isDstAddrLocal || dstAddr->isContained();
+ const bool srcAddrMayNeedReg =
+ isSrcAddrLocal || ((srcAddrOrFill != nullptr) && srcAddrOrFill->isContained());
+ const bool dstAddrMayNeedReg = isDstAddrLocal || dstAddr->isContained();
- // The following allocates an additional integer register in a case
- // when a load instruction and a store instruction cannot be encoded using offset
- // from a corresponding base register.
- if (srcAddrMayNeedReg && dstAddrMayNeedReg)
- {
- buildInternalIntRegisterDefForNode(blkNode);
- }
-#endif
+ // The following allocates an additional integer register in a case
+ // when a load instruction and a store instruction cannot be encoded using offset
+ // from a corresponding base register.
+ if (srcAddrMayNeedReg && dstAddrMayNeedReg)
+ {
+ buildInternalIntRegisterDefForNode(blkNode);
}
- break;
+#endif
+ }
+ break;
- case GenTreeBlk::BlkOpKindUnrollMemmove:
- {
+ case GenTreeBlk::BlkOpKindUnrollMemmove:
+ {
#ifdef TARGET_ARM64
- // Prepare SIMD/GPR registers needed to perform an unrolled memmove. The idea that
- // we can ignore the fact that src and dst might overlap if we save the whole src
- // to temp regs in advance.
+ // Prepare SIMD/GPR registers needed to perform an unrolled memmove. The idea that
+ // we can ignore the fact that src and dst might overlap if we save the whole src
+ // to temp regs in advance.
- // Lowering was expected to get rid of memmove in case of zero
- assert(size > 0);
+ // Lowering was expected to get rid of memmove in case of zero
+ assert(size > 0);
- const unsigned simdSize = FP_REGSIZE_BYTES;
- if (size >= simdSize)
- {
- unsigned simdRegs = size / simdSize;
- if ((size % simdSize) != 0)
- {
- // TODO-CQ: Consider using GPR load/store here if the reminder is 1,2,4 or 8
- simdRegs++;
- }
- for (unsigned i = 0; i < simdRegs; i++)
- {
- // It's too late to revert the unrolling so we hope we'll have enough SIMD regs
- // no more than MaxInternalCount. Currently, it's controlled by getUnrollThreshold(memmove)
- buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
- }
- }
- else if (isPow2(size))
+ const unsigned simdSize = FP_REGSIZE_BYTES;
+ if (size >= simdSize)
+ {
+ unsigned simdRegs = size / simdSize;
+ if ((size % simdSize) != 0)
{
- // Single GPR for 1,2,4,8
- buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ // TODO-CQ: Consider using GPR load/store here if the reminder is 1,2,4 or 8
+ simdRegs++;
}
- else
+ for (unsigned i = 0; i < simdRegs; i++)
{
- // Any size from 3 to 15 can be handled via two GPRs
- buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
- buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ // It's too late to revert the unrolling so we hope we'll have enough SIMD regs
+ // no more than MaxInternalCount. Currently, it's controlled by getUnrollThreshold(memmove)
+ buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
}
+ }
+ else if (isPow2(size))
+ {
+ // Single GPR for 1,2,4,8
+ buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ }
+ else
+ {
+ // Any size from 3 to 15 can be handled via two GPRs
+ buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ }
#else // TARGET_ARM64
- unreached();
+ unreached();
#endif
+ }
+ break;
+
+ case GenTreeBlk::BlkOpKindHelper:
+ dstAddrRegMask = RBM_ARG_0;
+ if (srcAddrOrFill != nullptr)
+ {
+ assert(!srcAddrOrFill->isContained());
+ srcRegMask = RBM_ARG_1;
}
+ sizeRegMask = RBM_ARG_2;
break;
- case GenTreeBlk::BlkOpKindHelper:
- dstAddrRegMask = RBM_ARG_0;
- if (srcAddrOrFill != nullptr)
- {
- assert(!srcAddrOrFill->isContained());
- srcRegMask = RBM_ARG_1;
- }
- sizeRegMask = RBM_ARG_2;
- break;
-
- default:
- unreached();
- }
+ default:
+ unreached();
}
}
assert(blkNode->OperIsStore());
regMaskTP killMask = RBM_NONE;
- if ((blkNode->OperGet() == GT_STORE_OBJ) && blkNode->OperIsCopyBlkOp())
+ bool isCopyBlk = varTypeIsStruct(blkNode->Data());
+ switch (blkNode->gtBlkOpKind)
{
- assert(blkNode->AsBlk()->GetLayout()->HasGCPtr());
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
- }
- else
- {
- bool isCopyBlk = varTypeIsStruct(blkNode->Data());
- switch (blkNode->gtBlkOpKind)
- {
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
+#ifdef TARGET_XARCH
+ case GenTreeBlk::BlkOpKindCpObjRepInstr:
+#endif // TARGET_XARCH
+ assert(isCopyBlk && blkNode->AsBlk()->GetLayout()->HasGCPtr());
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
+ break;
+
#ifndef TARGET_X86
- case GenTreeBlk::BlkOpKindHelper:
- if (isCopyBlk)
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
- }
- else
- {
- killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
- }
- break;
+ case GenTreeBlk::BlkOpKindHelper:
+ if (isCopyBlk)
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
+ }
+ else
+ {
+ killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
+ }
+ break;
#endif
#ifdef TARGET_XARCH
- case GenTreeBlk::BlkOpKindRepInstr:
- if (isCopyBlk)
- {
- // rep movs kills RCX, RDI and RSI
- killMask = RBM_RCX | RBM_RDI | RBM_RSI;
- }
- else
- {
- // rep stos kills RCX and RDI.
- // (Note that the Data() node, if not constant, will be assigned to
- // RCX, but it's find that this kills it, as the value is not available
- // after this node in any case.)
- killMask = RBM_RDI | RBM_RCX;
- }
- break;
+ case GenTreeBlk::BlkOpKindRepInstr:
+ if (isCopyBlk)
+ {
+ // rep movs kills RCX, RDI and RSI
+ killMask = RBM_RCX | RBM_RDI | RBM_RSI;
+ }
+ else
+ {
+ // rep stos kills RCX and RDI.
+ // (Note that the Data() node, if not constant, will be assigned to
+ // RCX, but it's find that this kills it, as the value is not available
+ // after this node in any case.)
+ killMask = RBM_RDI | RBM_RCX;
+ }
+ break;
#endif
- case GenTreeBlk::BlkOpKindUnrollMemmove:
- case GenTreeBlk::BlkOpKindUnroll:
- case GenTreeBlk::BlkOpKindInvalid:
- // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
- break;
- }
+ case GenTreeBlk::BlkOpKindUnrollMemmove:
+ case GenTreeBlk::BlkOpKindUnroll:
+ case GenTreeBlk::BlkOpKindInvalid:
+ // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
+ break;
}
+
return killMask;
}
killMask = getKillSetForModDiv(tree->AsOp());
break;
- case GT_STORE_OBJ:
case GT_STORE_BLK:
case GT_STORE_DYN_BLK:
killMask = getKillSetForBlockStore(tree->AsBlk());
break;
case GT_STORE_BLK:
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
srcCount = BuildBlockStore(tree->AsBlk());
break;
srcAddrOrFill = src->AsIndir()->Addr();
}
- if (blkNode->OperIs(GT_STORE_OBJ))
+ switch (blkNode->gtBlkOpKind)
{
- // We don't need to materialize the struct size but we still need
- // a temporary register to perform the sequence of loads and stores.
- // We can't use the special Write Barrier registers, so exclude them from the mask
- regMaskTP internalIntCandidates =
- allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
- buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
-
- if (size >= 2 * REGSIZE_BYTES)
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
{
- // TODO-LoongArch64: We will use ld/st paired to reduce code size and improve performance
- // so we need to reserve an extra internal register.
+ // We don't need to materialize the struct size but we still need
+ // a temporary register to perform the sequence of loads and stores.
+ // We can't use the special Write Barrier registers, so exclude them from the mask
+ regMaskTP internalIntCandidates =
+ allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
- }
- // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
- dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
+ if (size >= 2 * REGSIZE_BYTES)
+ {
+ // TODO-LoongArch64: We will use ld/st paired to reduce code size and improve performance
+ // so we need to reserve an extra internal register.
+ buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
+ }
- // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
- // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
- // which is killed by a StoreObj (and thus needn't be reserved).
- if (srcAddrOrFill != nullptr)
- {
- assert(!srcAddrOrFill->isContained());
- srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
+ // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
+ dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
+
+ // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
+ // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
+ // which is killed by a StoreObj (and thus needn't be reserved).
+ if (srcAddrOrFill != nullptr)
+ {
+ assert(!srcAddrOrFill->isContained());
+ srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
+ }
}
- }
- else
- {
- switch (blkNode->gtBlkOpKind)
- {
- case GenTreeBlk::BlkOpKindUnroll:
- buildInternalIntRegisterDefForNode(blkNode);
- break;
+ break;
- case GenTreeBlk::BlkOpKindHelper:
- dstAddrRegMask = RBM_ARG_0;
- if (srcAddrOrFill != nullptr)
- {
- assert(!srcAddrOrFill->isContained());
- srcRegMask = RBM_ARG_1;
- }
- sizeRegMask = RBM_ARG_2;
- break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ buildInternalIntRegisterDefForNode(blkNode);
+ break;
- default:
- unreached();
- }
+ case GenTreeBlk::BlkOpKindHelper:
+ dstAddrRegMask = RBM_ARG_0;
+ if (srcAddrOrFill != nullptr)
+ {
+ assert(!srcAddrOrFill->isContained());
+ srcRegMask = RBM_ARG_1;
+ }
+ sizeRegMask = RBM_ARG_2;
+ break;
+
+ default:
+ unreached();
}
}
break;
case GT_STORE_BLK:
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
srcCount = BuildBlockStore(tree->AsBlk());
break;
srcAddrOrFill = src->AsIndir()->Addr();
}
- if (blkNode->OperIs(GT_STORE_OBJ))
+ switch (blkNode->gtBlkOpKind)
{
- // We don't need to materialize the struct size but we still need
- // a temporary register to perform the sequence of loads and stores.
- // We can't use the special Write Barrier registers, so exclude them from the mask
- regMaskTP internalIntCandidates =
- allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
- buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
-
- if (size >= 2 * REGSIZE_BYTES)
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
{
- // TODO-RISCV64: We will use ld/st paired to reduce code size and improve performance
- // so we need to reserve an extra internal register.
+ // We don't need to materialize the struct size but we still need
+ // a temporary register to perform the sequence of loads and stores.
+ // We can't use the special Write Barrier registers, so exclude them from the mask
+ regMaskTP internalIntCandidates =
+ allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF);
buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
- }
- // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
- dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
+ if (size >= 2 * REGSIZE_BYTES)
+ {
+ // TODO-RISCV64: We will use ld/st paired to reduce code size and improve performance
+ // so we need to reserve an extra internal register.
+ buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates);
+ }
- // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
- // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
- // which is killed by a StoreObj (and thus needn't be reserved).
- if (srcAddrOrFill != nullptr)
- {
- assert(!srcAddrOrFill->isContained());
- srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
+ // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF.
+ dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF;
+
+ // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
+ // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
+ // which is killed by a StoreObj (and thus needn't be reserved).
+ if (srcAddrOrFill != nullptr)
+ {
+ assert(!srcAddrOrFill->isContained());
+ srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
+ }
}
- }
- else
- {
- switch (blkNode->gtBlkOpKind)
- {
- case GenTreeBlk::BlkOpKindUnroll:
- buildInternalIntRegisterDefForNode(blkNode);
- break;
+ break;
- case GenTreeBlk::BlkOpKindHelper:
- dstAddrRegMask = RBM_ARG_0;
- if (srcAddrOrFill != nullptr)
- {
- assert(!srcAddrOrFill->isContained());
- srcRegMask = RBM_ARG_1;
- }
- sizeRegMask = RBM_ARG_2;
- break;
+ case GenTreeBlk::BlkOpKindUnroll:
+ buildInternalIntRegisterDefForNode(blkNode);
+ break;
- default:
- unreached();
- }
+ case GenTreeBlk::BlkOpKindHelper:
+ dstAddrRegMask = RBM_ARG_0;
+ if (srcAddrOrFill != nullptr)
+ {
+ assert(!srcAddrOrFill->isContained());
+ srcRegMask = RBM_ARG_1;
+ }
+ sizeRegMask = RBM_ARG_2;
+ break;
+
+ default:
+ unreached();
}
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
case GT_STORE_BLK:
- case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
srcCount = BuildBlockStore(tree->AsBlk());
break;
case GT_STOREIND:
case GT_ARR_INDEX:
case GT_STORE_BLK:
- case GT_STORE_OBJ:
case GT_SWITCH_TABLE:
case GT_LOCKADD:
#ifdef TARGET_X86
srcAddrOrFill = src->AsIndir()->Addr();
}
- if (blkNode->OperIs(GT_STORE_OBJ))
+ switch (blkNode->gtBlkOpKind)
{
- if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindRepInstr)
- {
+ case GenTreeBlk::BlkOpKindCpObjRepInstr:
// We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
sizeRegMask = RBM_RCX;
- }
+ FALLTHROUGH;
- // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its
- // sources.
- dstAddrRegMask = RBM_RDI;
- srcRegMask = RBM_RSI;
- }
- else
- {
- switch (blkNode->gtBlkOpKind)
- {
- case GenTreeBlk::BlkOpKindUnroll:
- if ((size % XMM_REGSIZE_BYTES) != 0)
- {
- regMaskTP regMask = availableIntRegs;
-#ifdef TARGET_X86
- if ((size & 1) != 0)
- {
- // We'll need to store a byte so a byte register is needed on x86.
- regMask = allByteRegs();
- internalIsByte = true;
- }
-#endif
- internalIntDef = buildInternalIntRegisterDefForNode(blkNode, regMask);
- }
+ case GenTreeBlk::BlkOpKindCpObjUnroll:
+ // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its sources.
+ dstAddrRegMask = RBM_RDI;
+ srcRegMask = RBM_RSI;
+ break;
- if (size >= XMM_REGSIZE_BYTES)
+ case GenTreeBlk::BlkOpKindUnroll:
+ if ((size % XMM_REGSIZE_BYTES) != 0)
+ {
+ regMaskTP regMask = availableIntRegs;
+#ifdef TARGET_X86
+ if ((size & 1) != 0)
{
- buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
- SetContainsAVXFlags(size);
+ // We'll need to store a byte so a byte register is needed on x86.
+ regMask = allByteRegs();
+ internalIsByte = true;
}
- break;
+#endif
+ internalIntDef = buildInternalIntRegisterDefForNode(blkNode, regMask);
+ }
- case GenTreeBlk::BlkOpKindUnrollMemmove:
+ if (size >= XMM_REGSIZE_BYTES)
{
- // Prepare SIMD/GPR registers needed to perform an unrolled memmove. The idea that
- // we can ignore the fact that src and dst might overlap if we save the whole src
- // to temp regs in advance, e.g. for memmove(dst: rcx, src: rax, len: 120):
- //
- // vmovdqu ymm0, ymmword ptr[rax + 0]
- // vmovdqu ymm1, ymmword ptr[rax + 32]
- // vmovdqu ymm2, ymmword ptr[rax + 64]
- // vmovdqu ymm3, ymmword ptr[rax + 88]
- // vmovdqu ymmword ptr[rcx + 0], ymm0
- // vmovdqu ymmword ptr[rcx + 32], ymm1
- // vmovdqu ymmword ptr[rcx + 64], ymm2
- // vmovdqu ymmword ptr[rcx + 88], ymm3
- //
-
- // Not yet finished for x86
- assert(TARGET_POINTER_SIZE == 8);
-
- // Lowering was expected to get rid of memmove in case of zero
- assert(size > 0);
-
- unsigned simdSize = compiler->roundDownSIMDSize(size);
- if (size <= ZMM_RECOMMENDED_THRESHOLD)
- {
- // Only use ZMM for large data due to possible CPU throttle issues
- simdSize = min(YMM_REGSIZE_BYTES, compiler->roundDownSIMDSize(size));
- }
+ buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
+ SetContainsAVXFlags(size);
+ }
+ break;
- if ((size >= simdSize) && (simdSize > 0))
- {
- unsigned simdRegs = size / simdSize;
- if ((size % simdSize) != 0)
- {
- // TODO-CQ: Consider using GPR load/store here if the reminder is 1,2,4 or 8
- // especially if we enable AVX-512
- simdRegs++;
- }
- for (unsigned i = 0; i < simdRegs; i++)
- {
- // It's too late to revert the unrolling so we hope we'll have enough SIMD regs
- // no more than MaxInternalCount. Currently, it's controlled by getUnrollThreshold(memmove)
- buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
- }
- SetContainsAVXFlags();
- }
- else if (isPow2(size))
+ case GenTreeBlk::BlkOpKindUnrollMemmove:
+ {
+ // Prepare SIMD/GPR registers needed to perform an unrolled memmove. The idea that
+ // we can ignore the fact that src and dst might overlap if we save the whole src
+ // to temp regs in advance, e.g. for memmove(dst: rcx, src: rax, len: 120):
+ //
+ // vmovdqu ymm0, ymmword ptr[rax + 0]
+ // vmovdqu ymm1, ymmword ptr[rax + 32]
+ // vmovdqu ymm2, ymmword ptr[rax + 64]
+ // vmovdqu ymm3, ymmword ptr[rax + 88]
+ // vmovdqu ymmword ptr[rcx + 0], ymm0
+ // vmovdqu ymmword ptr[rcx + 32], ymm1
+ // vmovdqu ymmword ptr[rcx + 64], ymm2
+ // vmovdqu ymmword ptr[rcx + 88], ymm3
+ //
+
+ // Not yet finished for x86
+ assert(TARGET_POINTER_SIZE == 8);
+
+ // Lowering was expected to get rid of memmove in case of zero
+ assert(size > 0);
+
+ unsigned simdSize = compiler->roundDownSIMDSize(size);
+ if (size <= ZMM_RECOMMENDED_THRESHOLD)
+ {
+ // Only use ZMM for large data due to possible CPU throttle issues
+ simdSize = min(YMM_REGSIZE_BYTES, compiler->roundDownSIMDSize(size));
+ }
+
+ if ((size >= simdSize) && (simdSize > 0))
+ {
+ unsigned simdRegs = size / simdSize;
+ if ((size % simdSize) != 0)
{
- // Single GPR for 1,2,4,8
- buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ // TODO-CQ: Consider using GPR load/store here if the reminder is 1,2,4 or 8
+ // especially if we enable AVX-512
+ simdRegs++;
}
- else
+ for (unsigned i = 0; i < simdRegs; i++)
{
- // Any size from 3 to 15 can be handled via two GPRs
- buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
- buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ // It's too late to revert the unrolling so we hope we'll have enough SIMD regs
+ // no more than MaxInternalCount. Currently, it's controlled by getUnrollThreshold(memmove)
+ buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates());
}
+ SetContainsAVXFlags();
}
- break;
+ else if (isPow2(size))
+ {
+ // Single GPR for 1,2,4,8
+ buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ }
+ else
+ {
+ // Any size from 3 to 15 can be handled via two GPRs
+ buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ buildInternalIntRegisterDefForNode(blkNode, availableIntRegs);
+ }
+ }
+ break;
- case GenTreeBlk::BlkOpKindRepInstr:
- dstAddrRegMask = RBM_RDI;
- srcRegMask = RBM_RSI;
- sizeRegMask = RBM_RCX;
- break;
+ case GenTreeBlk::BlkOpKindRepInstr:
+ dstAddrRegMask = RBM_RDI;
+ srcRegMask = RBM_RSI;
+ sizeRegMask = RBM_RCX;
+ break;
#ifdef TARGET_AMD64
- case GenTreeBlk::BlkOpKindHelper:
- dstAddrRegMask = RBM_ARG_0;
- srcRegMask = RBM_ARG_1;
- sizeRegMask = RBM_ARG_2;
- break;
+ case GenTreeBlk::BlkOpKindHelper:
+ dstAddrRegMask = RBM_ARG_0;
+ srcRegMask = RBM_ARG_1;
+ sizeRegMask = RBM_ARG_2;
+ break;
#endif
- default:
- unreached();
- }
+ default:
+ unreached();
}
if ((srcAddrOrFill == nullptr) && (srcRegMask != RBM_NONE))
case GT_BLK:
{
assert(varTypeIsStruct(location));
- GenTreeBlk* storeBlk = location->AsBlk();
- genTreeOps storeOper = location->AsBlk()->GetLayout()->HasGCPtr() ? GT_STORE_OBJ : GT_STORE_BLK;
+ JITDUMP("Rewriting GT_ASG(%s(X), Y) to STORE_BLK(X,Y):\n", GenTree::OpName(location->gtOper));
- JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::OpName(location->gtOper),
- GenTree::OpName(storeOper));
- storeBlk->SetOperRaw(storeOper);
+ GenTreeBlk* storeBlk = location->AsBlk();
+ storeBlk->SetOperRaw(GT_STORE_BLK);
storeBlk->gtFlags &= ~GTF_DONT_CSE;
storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_DONT_CSE));
storeBlk->AsBlk()->Data() = value;