IF_DEF(SWR_CNS, IS_SF_WR, CNS ) // write [stk], const
IF_DEF(SRW_CNS, IS_SF_RW, CNS ) // r/w [stk], const
+IF_DEF(SWR_RRD_CNS, IS_AM_WR|IS_R1_RD, AMD_CNS) // write [stk], read reg, const
+
IF_DEF(SRW_SHF, IS_SF_RW, CNS ) // shift [stk], const
//----------------------------------------------------------------------------
emitAdjustStackDepthPushPop(ins);
}
+//------------------------------------------------------------------------
+// emitIns_S_R_I: emits the code for an instruction that takes a stack operand,
+// a register operand, and an immediate.
+//
+// Arguments:
+// ins - The instruction being emitted
+// attr - The emit attribute
+// varNum - The varNum of the stack operand
+// offs - The offset for the stack operand
+// reg - The register operand
+// ival - The immediate value
+//
+void emitter::emitIns_S_R_I(instruction ins, emitAttr attr, int varNum, int offs, regNumber reg, int ival)
+{
+ // This is only used for INS_vextracti128 and INS_vextractf128, and for these 'ival' must be 0 or 1.
+ assert(ins == INS_vextracti128 || ins == INS_vextractf128);
+ assert((ival == 0) || (ival == 1));
+ instrDesc* id = emitNewInstrAmdCns(attr, 0, ival);
+
+ id->idIns(ins);
+ id->idInsFmt(IF_SWR_RRD_CNS);
+ id->idReg1(reg);
+ id->idAddr()->iiaLclVar.initLclVarAddr(varNum, offs);
+#ifdef DEBUG
+ id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+ UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varNum, offs, ival);
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival)
{
assert(ins == INS_vextracti128 || ins == INS_vextractf128);
}
break;
+ case IF_SWR_RRD_CNS:
+ assert(ins == INS_vextracti128 || ins == INS_vextractf128);
+ assert(UseVEXEncoding());
+ emitGetInsAmdCns(id, &cnsVal);
+
+ printf("%s", sstr);
+
+ emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+ id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+ printf(", %s", emitRegName(id->idReg1(), attr));
+
+ val = cnsVal.cnsVal;
+ printf(", ");
+
+ if (cnsVal.cnsReloc)
+ {
+ emitDispReloc(val);
+ }
+ else
+ {
+ goto PRINT_CONSTANT;
+ }
+ break;
+
case IF_RRD_SRD:
case IF_RWR_SRD:
case IF_RRW_SRD:
sz = emitSizeOfInsDsc(id);
break;
+ case IF_SWR_RRD_CNS:
+ assert(ins == INS_vextracti128 || ins == INS_vextractf128);
+ assert(UseVEXEncoding());
+ emitGetInsAmdCns(id, &cnsVal);
+ code = insCodeMR(ins);
+ dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
+ sz = emitSizeOfInsDsc(id);
+ break;
+
case IF_RRW_SRD_CNS:
case IF_RWR_SRD_CNS:
assert(IsSSEOrAVXInstruction(ins));
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt);
void emitIns_R_R_AR_I(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival);
+void emitIns_S_R_I(instruction ins, emitAttr attr, int varNum, int offs, regNumber reg, int ival);
+
void emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int disp, regNumber ireg, int ival);
void emitIns_R_R_C_I(
}
}
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // Create Intervals to use for the save & restore of the upper halves of large vector lclVars.
+ if (enregisterLocalVars)
+ {
+ VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars);
+ unsigned largeVectorVarIndex = 0;
+ while (largeVectorVarsIter.NextElem(&largeVectorVarIndex))
+ {
+ makeUpperVectorInterval(largeVectorVarIndex);
+ }
+ }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
#if DOUBLE_ALIGN
if (checkDoubleAlign)
{
//
void LinearScan::setIntervalAsSpilled(Interval* interval)
{
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (interval->isUpperVector)
+ {
+ assert(interval->relatedInterval->isLocalVar);
+ interval->isSpilled = true;
+ // Now we need to mark the local as spilled also, even if the lower half is never spilled,
+ // as this will use the upper part of its home location.
+ interval = interval->relatedInterval;
+ }
+#endif
if (interval->isLocalVar)
{
unsigned varIndex = interval->getVarIndex(compiler);
{
if (isAssignedToInterval(assignedInterval, regRecord))
{
- // Only localVars or constants should be assigned to registers at block boundaries.
+ // Only localVars, constants or vector upper halves should be assigned to registers at block boundaries.
if (!assignedInterval->isLocalVar)
{
- assert(assignedInterval->isConstant);
+ assert(assignedInterval->isConstant || assignedInterval->IsUpperVector());
// Don't need to update the VarToRegMap.
inVarToRegMap = nullptr;
}
if (assignedInterval != nullptr)
{
- assert(assignedInterval->isLocalVar || assignedInterval->isConstant);
+ assert(assignedInterval->isLocalVar || assignedInterval->isConstant ||
+ assignedInterval->IsUpperVector());
if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
{
{
unassignPhysReg(physRegRecord, nullptr);
}
- inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
+ if (!assignedInterval->IsUpperVector())
+ {
+ inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
+ }
}
else
{
{
outVarToRegMap[varIndex] = REG_STK;
}
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // Restore any partially-spilled large vector locals.
+ if (varTypeNeedsPartialCalleeSave(interval->registerType) && interval->isPartiallySpilled)
+ {
+ Interval* upperInterval = getUpperVectorInterval(varIndex);
+ if (interval->isActive && allocationPassComplete)
+ {
+ insertUpperVectorRestore(nullptr, upperInterval, currentBlock);
+ }
+ upperInterval->isActive = false;
+ interval->isPartiallySpilled = false;
+ }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
}
}
}
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (enregisterLocalVars)
+ {
+ VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars);
+ unsigned largeVectorVarIndex = 0;
+ while (largeVectorVarsIter.NextElem(&largeVectorVarIndex))
+ {
+ Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex);
+ lclVarInterval->isPartiallySpilled = false;
+ }
+ }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
{
getRegisterRecord(reg)->recentRefPosition = nullptr;
}
}
#ifdef FEATURE_SIMD
- else if (refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse)
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ else if (currentInterval->isUpperVector)
{
- if (currentInterval->isInternal)
+ // This is a save or restore of the upper half of a large vector lclVar.
+ Interval* lclVarInterval = currentInterval->relatedInterval;
+ assert(lclVarInterval->isLocalVar);
+ if (refType == RefTypeUpperVectorSave)
{
- // This is the lclVar case. This internal interval is what will hold the upper half.
- Interval* lclVarInterval = currentInterval->relatedInterval;
- assert(lclVarInterval->isLocalVar);
- if (lclVarInterval->physReg == REG_NA)
+ if ((lclVarInterval->physReg == REG_NA) ||
+ (lclVarInterval->isPartiallySpilled && (currentInterval->physReg == REG_STK)))
{
allocate = false;
}
+ else
+ {
+ lclVarInterval->isPartiallySpilled = true;
+ }
}
- else
+ else if (refType == RefTypeUpperVectorRestore)
{
- assert(!currentInterval->isLocalVar);
- // Note that this case looks a lot like the case below, but in this case we need to spill
- // at the previous RefPosition.
- if (assignedRegister != REG_NA)
+ assert(currentInterval->isUpperVector);
+ if (lclVarInterval->isPartiallySpilled)
{
- unassignPhysReg(getRegisterRecord(assignedRegister), currentInterval->firstRefPosition);
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+ lclVarInterval->isPartiallySpilled = false;
}
- currentRefPosition->registerAssignment = RBM_NONE;
- continue;
+ else
+ {
+ allocate = false;
+ }
+ }
+ }
+ else if (refType == RefTypeUpperVectorSave)
+ {
+ assert(!currentInterval->isLocalVar);
+ // Note that this case looks a lot like the case below, but in this case we need to spill
+ // at the previous RefPosition.
+ // We may want to consider allocating two callee-save registers for this case, but it happens rarely
+ // enough that it may not warrant the additional complexity.
+ if (assignedRegister != REG_NA)
+ {
+ unassignPhysReg(getRegisterRecord(assignedRegister), currentInterval->firstRefPosition);
+ INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
}
+ currentRefPosition->registerAssignment = RBM_NONE;
+ continue;
}
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
#endif // FEATURE_SIMD
if (allocate == false)
// then find a register to spill
if (assignedRegister == REG_NA)
{
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if (refType == RefTypeUpperVectorSaveDef)
- {
- // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable.
- // TODO CQ: Save the value directly to memory, #18144.
- // TODO-ARM64-CQ: Determine whether copying to one integer callee-save registers would be
- // profitable.
-
- // SaveDef position occurs after the Use of args and at the same location as Kill/Def
- // positions of a call node. But SaveDef position cannot use any of the arg regs as
- // they are needed for call node.
- currentRefPosition->registerAssignment =
- (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH & ~RBM_FLTARG_REGS);
- assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
-
- // There MUST be caller-save registers available, because they have all just been killed.
- // Amd64 Windows: xmm4-xmm5 are guaranteed to be available as xmm0-xmm3 are used for passing args.
- // Amd64 Unix: xmm8-xmm15 are guaranteed to be available as xmm0-xmm7 are used for passing args.
- // X86 RyuJIT Windows: xmm4-xmm7 are guanrateed to be available.
- assert(assignedRegister != REG_NA);
-
- // Now, spill it.
- // Note:
- // i) The reason we have to spill is that SaveDef position is allocated after the Kill positions
- // of the call node are processed. Since callee-trash registers are killed by call node
- // we explicitly spill and unassign the register.
- // ii) These will look a bit backward in the dump, but it's a pain to dump the alloc before the
- // spill).
- unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
- INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister));
-
- // Now set assignedRegister to REG_NA again so that we don't re-activate it.
- assignedRegister = REG_NA;
+ bool isAllocatable = currentRefPosition->IsActualRef() || currentRefPosition->RegOptional();
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(_TARGET_ARM64_)
+ if (currentInterval->isUpperVector)
+ {
+ // On Arm64, we can't save the upper half to memory without a register.
+ isAllocatable = true;
+ assert(!currentRefPosition->RegOptional());
}
- else
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if (currentRefPosition->RequiresRegister() || currentRefPosition->RegOptional())
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE && _TARGET_ARM64_
+ if (isAllocatable)
{
if (allocateReg)
{
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
//------------------------------------------------------------------------
-// insertUpperVectorSaveAndReload: Insert code to save and restore the upper half of a vector that lives
-// in a callee-save register at the point of a kill (the upper half is
-// not preserved).
+// insertUpperVectorSave: Insert code to save the upper half of a vector that lives
+// in a callee-save register at the point of a kill (the upper half is
+// not preserved).
//
// Arguments:
-// tree - This is the node around which we will insert the Save & Reload.
+// tree - This is the node before which we will insert the Save.
// It will be a call or some node that turns into a call.
-// refPosition - The RefTypeUpperVectorSaveDef RefPosition.
+// refPosition - The RefTypeUpperVectorSave RefPosition.
+// upperInterval - The Interval for the upper half of the large vector lclVar.
+// block - the BasicBlock containing the call.
//
-void LinearScan::insertUpperVectorSaveAndReload(GenTree* tree, RefPosition* refPosition, BasicBlock* block)
+void LinearScan::insertUpperVectorSave(GenTree* tree,
+ RefPosition* refPosition,
+ Interval* upperInterval,
+ BasicBlock* block)
{
- Interval* lclVarInterval = refPosition->getInterval()->relatedInterval;
+ Interval* lclVarInterval = upperInterval->relatedInterval;
assert(lclVarInterval->isLocalVar == true);
- LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
- assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
regNumber lclVarReg = lclVarInterval->physReg;
if (lclVarReg == REG_NA)
{
return;
}
+ Interval* upperVectorInterval = refPosition->getInterval();
+ assert(upperVectorInterval->relatedInterval == lclVarInterval);
+ LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
+ assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
assert((genRegMask(lclVarReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
- regNumber spillReg = refPosition->assignedReg();
- bool spillToMem = refPosition->spillAfter;
+ // On Arm64, we must always have a register to save the upper half,
+ // while on x86 we can spill directly to memory.
+ regNumber spillReg = refPosition->assignedReg();
+#ifdef _TARGET_ARM64_
+ bool spillToMem = refPosition->spillAfter;
+ assert(spillReg != REG_NA);
+#else
+ bool spillToMem = (spillReg == REG_NA);
+ assert(!refPosition->spillAfter);
+#endif
LIR::Range& blockRange = LIR::AsRange(block);
- // First, insert the save before the call.
+ // Insert the save before the call.
GenTree* saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
saveLcl->gtRegNum = lclVarReg;
if (spillToMem)
{
simdNode->gtFlags |= GTF_SPILL;
+ upperVectorInterval->physReg = REG_STK;
+ }
+ else
+ {
+ assert((genRegMask(spillReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
+ upperVectorInterval->physReg = spillReg;
}
blockRange.InsertBefore(tree, LIR::SeqTree(compiler, simdNode));
+}
- // Now insert the restore after the call.
+//------------------------------------------------------------------------
+// insertUpperVectorRestore: Insert code to restore the upper half of a vector that has been partially spilled.
+//
+// Arguments:
+// tree - This is the node for which we will insert the Restore.
+// If non-null, it will be a use of the large vector lclVar.
+// If null, the Restore will be added to the end of the block.
+// upperVectorInterval - The Interval for the upper vector for the lclVar.
+// block - the BasicBlock into which we will be inserting the code.
+//
+// Notes:
+// In the case where 'tree' is non-null, we will insert the restore just prior to
+// its use, in order to ensure the proper ordering.
+//
+void LinearScan::insertUpperVectorRestore(GenTree* tree, Interval* upperVectorInterval, BasicBlock* block)
+{
+ Interval* lclVarInterval = upperVectorInterval->relatedInterval;
+ assert(lclVarInterval->isLocalVar == true);
+ regNumber lclVarReg = lclVarInterval->physReg;
- GenTree* restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
+ // We should not call this method if the lclVar is not in a register (we should have simply marked the entire
+ // lclVar as spilled).
+ assert(lclVarReg != REG_NA);
+ LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
+ assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
+
+ GenTree* restoreLcl = nullptr;
+ restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
restoreLcl->gtRegNum = lclVarReg;
SetLsraAdded(restoreLcl);
- simdNode = new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore,
- varDsc->lvBaseType, genTypeSize(varDsc->lvType));
- simdNode->gtRegNum = spillReg;
+ GenTreeSIMD* simdNode =
+ new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore,
+ varDsc->lvBaseType, genTypeSize(varDsc->lvType));
+
+ regNumber restoreReg = upperVectorInterval->physReg;
SetLsraAdded(simdNode);
- if (spillToMem)
+
+ if (upperVectorInterval->physReg == REG_NA)
{
+ // We need a stack location for this.
+ assert(lclVarInterval->isSpilled);
simdNode->gtFlags |= GTF_SPILLED;
+#ifdef _TARGET_AMD64_
+ simdNode->gtFlags |= GTF_NOREG_AT_USE;
+#else
+ assert(!"We require a register for Arm64 UpperVectorRestore");
+#endif
}
+ simdNode->gtRegNum = restoreReg;
- blockRange.InsertAfter(tree, LIR::SeqTree(compiler, simdNode));
+ LIR::Range& blockRange = LIR::AsRange(block);
+ JITDUMP("Adding UpperVectorRestore ");
+ if (tree != nullptr)
+ {
+ JITDUMP("after:\n");
+ DISPTREE(tree);
+ LIR::Use treeUse;
+ bool foundUse = blockRange.TryGetUse(tree, &treeUse);
+ assert(foundUse);
+ // We need to insert the restore prior to the use, not (necessarily) immediately after the lclVar.
+ blockRange.InsertBefore(treeUse.User(), LIR::SeqTree(compiler, simdNode));
+ }
+ else
+ {
+ JITDUMP("at end of BB%02u:\n", block->bbNum);
+ if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
+ {
+ noway_assert(!blockRange.IsEmpty());
+
+ GenTree* branch = blockRange.LastNode();
+ assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE ||
+ branch->OperGet() == GT_SWITCH);
+
+ blockRange.InsertBefore(branch, LIR::SeqTree(compiler, simdNode));
+ }
+ else
+ {
+ assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
+ blockRange.InsertAtEnd(LIR::SeqTree(compiler, simdNode));
+ }
+ }
+ DISPTREE(simdNode);
+ JITDUMP("\n");
}
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
{
RefType refType = refPosition->refType;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if ((refType == RefTypeUpperVectorSave) || (refType == RefTypeUpperVectorRestore))
+ {
+ Interval* interval = refPosition->getInterval();
+ // If this is not an 'upperVector', it must be a tree temp that has been already
+ // (fully) spilled.
+ if (!interval->isUpperVector)
+ {
+ assert(interval->firstRefPosition->spillAfter);
+ }
+ else
+ {
+ // The UpperVector RefPositions spill to the localVar's home location.
+ Interval* lclVarInterval = interval->relatedInterval;
+ assert(lclVarInterval->isSpilled || (!refPosition->spillAfter && !refPosition->reload));
+ }
+ return;
+ }
+#endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (refPosition->spillAfter || refPosition->reload ||
(refPosition->RegOptional() && refPosition->assignedReg() == REG_NA))
{
var_types typ;
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if ((refType == RefTypeUpperVectorSaveDef) || (refType == RefTypeUpperVectorSaveUse))
+ if (refType == RefTypeUpperVectorSave)
{
typ = LargeVectorSaveType;
}
switch (currentRefPosition->refType)
{
-#ifdef FEATURE_SIMD
- case RefTypeUpperVectorSaveUse:
- case RefTypeUpperVectorSaveDef:
-#endif // FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ case RefTypeUpperVectorSave:
+ case RefTypeUpperVectorRestore:
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
case RefTypeUse:
case RefTypeDef:
// These are the ones we're interested in
GenTree* treeNode = currentRefPosition->treeNode;
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- if (currentRefPosition->refType == RefTypeUpperVectorSaveDef)
+ if (currentRefPosition->refType == RefTypeUpperVectorSave)
{
- // The treeNode must be non-null. It is a call or an instruction that becomes a call.
+ // The treeNode is a call or something that might become one.
noway_assert(treeNode != nullptr);
-
- // If the associated interval is internal, this must be a RefPosition for a LargeVectorType LocalVar.
+ // If the associated interval is an UpperVector, this must be a RefPosition for a LargeVectorType
+ // LocalVar.
// Otherwise, this is a non-lclVar interval that has been spilled, and we don't need to do anything.
- if (currentRefPosition->getInterval()->isInternal)
+ Interval* interval = currentRefPosition->getInterval();
+ if (interval->isUpperVector)
{
- // If the LocalVar is in a callee-save register, we are going to spill its upper half around the
- // call.
- // If we have allocated a register to spill it to, we will use that; otherwise, we will spill it
- // to the stack. We can use as a temp register any non-arg caller-save register.
- currentRefPosition->referent->recentRefPosition = currentRefPosition;
- insertUpperVectorSaveAndReload(treeNode, currentRefPosition, block);
+ Interval* localVarInterval = interval->relatedInterval;
+ if ((localVarInterval->physReg != REG_NA) && !localVarInterval->isPartiallySpilled)
+ {
+ // If the localVar is in a register, it must be a callee-save register (otherwise it would have
+ // already been spilled).
+ assert(localVarInterval->assignedReg->isCalleeSave);
+ // If we have allocated a register to spill it to, we will use that; otherwise, we will spill it
+ // to the stack. We can use as a temp register any non-arg caller-save register.
+ currentRefPosition->referent->recentRefPosition = currentRefPosition;
+ insertUpperVectorSave(treeNode, currentRefPosition, currentRefPosition->getInterval(), block);
+ localVarInterval->isPartiallySpilled = true;
+ }
}
else
{
assert(!currentRefPosition->getInterval()->isLocalVar);
assert(currentRefPosition->getInterval()->firstRefPosition->spillAfter);
}
+ continue;
}
- else if (currentRefPosition->refType == RefTypeUpperVectorSaveUse)
+ else if (currentRefPosition->refType == RefTypeUpperVectorRestore)
{
- continue;
+ // The treeNode is a use of the large vector lclVar.
+ noway_assert(treeNode != nullptr);
+ // Since we don't do partial restores of tree temp intervals, this must be an upperVector.
+ Interval* interval = currentRefPosition->getInterval();
+ Interval* localVarInterval = interval->relatedInterval;
+ assert(interval->isUpperVector && (localVarInterval != nullptr));
+ if (localVarInterval->physReg != REG_NA)
+ {
+ assert(localVarInterval->isPartiallySpilled);
+ assert((localVarInterval->assignedReg != nullptr) &&
+ (localVarInterval->assignedReg->regNum == localVarInterval->physReg) &&
+ (localVarInterval->assignedReg->assignedInterval == localVarInterval));
+ insertUpperVectorRestore(treeNode, interval, block);
+ }
+ localVarInterval->isPartiallySpilled = false;
}
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
if (!currentRefPosition->getInterval()->isLocalVar)
{
while ((nextRefPosition != nullptr) &&
- (nextRefPosition->refType == RefTypeUpperVectorSaveDef))
+ (nextRefPosition->refType == RefTypeUpperVectorSave))
{
nextRefPosition = nextRefPosition->nextRefPosition;
}
{
printf(" (V%02u)", varNum);
}
+ else if (IsUpperVector())
+ {
+ assert(relatedInterval != nullptr);
+ printf(" (U%02u)", relatedInterval->varNum);
+ }
+ printf(" %s", varTypeName(registerType));
if (isInternal)
{
printf(" (INTERNAL)");
{
printf(" V%02u", varNum);
}
- if (isInternal)
+ else if (IsUpperVector())
+ {
+ assert(relatedInterval != nullptr);
+ printf(" (U%02u)", relatedInterval->varNum);
+ }
+ else if (isInternal)
{
printf(" internal");
}
printf("<V%02u/L%u>", varNum, intervalIndex);
return;
}
+ else if (IsUpperVector())
+ {
+ assert(relatedInterval != nullptr);
+ printf(" (U%02u)", relatedInterval->varNum);
+ }
char intervalTypeChar = 'I';
if (isInternal)
{
printf("PtArg %-4s ", getRegName(reg));
break;
+ case LSRA_EVENT_UPPER_VECTOR_SAVE:
+ dumpRefPositionShort(activeRefPosition, currentBlock);
+ printf("UVSav %-4s ", getRegName(reg));
+ break;
+
+ case LSRA_EVENT_UPPER_VECTOR_RESTORE:
+ dumpRefPositionShort(activeRefPosition, currentBlock);
+ printf("UVRes %-4s ", getRegName(reg));
+ dumpRegRecords();
+ break;
+
// We currently don't dump anything for these events.
case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE:
case LSRA_EVENT_SPILL_EXTENDED_LIFETIME:
{
printf(intervalNameFormat, 'V', interval->varNum);
}
+ else if (interval->IsUpperVector())
+ {
+ printf(intervalNameFormat, 'U', interval->relatedInterval->varNum);
+ }
else if (interval->isConstant)
{
printf(intervalNameFormat, 'C', interval->intervalIndex);
dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
break;
- case RefTypeUpperVectorSaveDef:
- case RefTypeUpperVectorSaveUse:
+ case RefTypeUpperVectorSave:
+ dumpLsraAllocationEvent(LSRA_EVENT_UPPER_VECTOR_SAVE, nullptr, REG_NA, currentBlock);
+ break;
+
+ case RefTypeUpperVectorRestore:
+ dumpLsraAllocationEvent(LSRA_EVENT_UPPER_VECTOR_RESTORE, nullptr, REG_NA, currentBlock);
+ break;
+
case RefTypeDef:
case RefTypeUse:
case RefTypeParamDef:
void insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned multiRegIdx, RefPosition* refPosition);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- // Insert code to save and restore the upper half of a vector that lives
- // in a callee-save register at the point of a call (the upper half is
- // not preserved).
- void insertUpperVectorSaveAndReload(GenTree* tree, RefPosition* refPosition, BasicBlock* block);
+ void makeUpperVectorInterval(unsigned varIndex);
+ Interval* getUpperVectorInterval(unsigned varIndex);
+ // Save the upper half of a vector that lives in a callee-save register at the point of a call.
+ void insertUpperVectorSave(GenTree* tree, RefPosition* refPosition, Interval* lclVarInterval, BasicBlock* block);
+ // Restore the upper half of a vector that's been partially spilled prior to a use in 'tree'.
+ void insertUpperVectorRestore(GenTree* tree, Interval* interval, BasicBlock* block);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// resolve along one block-block edge
void buildRefPositionsForNode(GenTree* tree, BasicBlock* block, LsraLocation loc);
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
- void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
+ void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet);
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
#if defined(UNIX_AMD64_ABI)
LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
// Miscellaneous
- LSRA_EVENT_FREE_REGS,
+ LSRA_EVENT_FREE_REGS, LSRA_EVENT_UPPER_VECTOR_SAVE, LSRA_EVENT_UPPER_VECTOR_RESTORE,
// Characteristics of the current RefPosition
LSRA_EVENT_INCREMENT_RANGE_END, // ???
, isSpecialPutArg(false)
, preferCalleeSave(false)
, isConstant(false)
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ , isUpperVector(false)
+ , isPartiallySpilled(false)
+#endif
, physReg(REG_COUNT)
#ifdef DEBUG
, intervalIndex(0)
// able to reuse a constant that's already in a register.
bool isConstant : 1;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // True if this is a special interval for saving the upper half of a large vector.
+ bool isUpperVector : 1;
+ // This is a convenience method to avoid ifdef's everywhere this is used.
+ bool IsUpperVector() const
+ {
+ return isUpperVector;
+ }
+
+ // True if this interval has been partially spilled
+ bool isPartiallySpilled : 1;
+#else
+ bool IsUpperVector() const
+ {
+ return false;
+ }
+#endif
+
// The register to which it is currently assigned.
regNumber physReg;
bool RequiresRegister()
{
- return (IsActualRef()
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- ) &&
- !RegOptional();
+ return IsActualRef() && !RegOptional();
}
void setRegOptional(bool val)
// memberName - enum member name
// memberValue - enum member value
// shortName - short name string
-// DEF_REFTYPE(memberName , memberValue , shortName )
- DEF_REFTYPE(RefTypeInvalid , 0x00 , "Invl" )
- DEF_REFTYPE(RefTypeDef , 0x01 , "Def " )
- DEF_REFTYPE(RefTypeUse , 0x02 , "Use " )
- DEF_REFTYPE(RefTypeKill , 0x04 , "Kill" )
- DEF_REFTYPE(RefTypeBB , 0x08 , "BB " )
- DEF_REFTYPE(RefTypeFixedReg , 0x10 , "Fixd" )
- DEF_REFTYPE(RefTypeExpUse , (0x20 | RefTypeUse), "ExpU" )
- DEF_REFTYPE(RefTypeParamDef , (0x10 | RefTypeDef), "Parm" )
- DEF_REFTYPE(RefTypeDummyDef , (0x20 | RefTypeDef), "DDef" )
- DEF_REFTYPE(RefTypeZeroInit , (0x30 | RefTypeDef), "Zero" )
- DEF_REFTYPE(RefTypeUpperVectorSaveDef, (0x40 | RefTypeDef), "UVSv" )
- DEF_REFTYPE(RefTypeUpperVectorSaveUse, (0x40 | RefTypeUse), "UVRs" )
- DEF_REFTYPE(RefTypeKillGCRefs , 0x80 , "KlGC" )
+// DEF_REFTYPE(memberName , memberValue , shortName )
+ DEF_REFTYPE(RefTypeInvalid , 0x00 , "Invl" )
+ DEF_REFTYPE(RefTypeDef , 0x01 , "Def " )
+ DEF_REFTYPE(RefTypeUse , 0x02 , "Use " )
+ DEF_REFTYPE(RefTypeKill , 0x04 , "Kill" )
+ DEF_REFTYPE(RefTypeBB , 0x08 , "BB " )
+ DEF_REFTYPE(RefTypeFixedReg , 0x10 , "Fixd" )
+ DEF_REFTYPE(RefTypeExpUse , (0x20 | RefTypeUse), "ExpU" )
+ DEF_REFTYPE(RefTypeParamDef , (0x10 | RefTypeDef), "Parm" )
+ DEF_REFTYPE(RefTypeDummyDef , (0x20 | RefTypeDef), "DDef" )
+ DEF_REFTYPE(RefTypeZeroInit , (0x30 | RefTypeDef), "Zero" )
+ DEF_REFTYPE(RefTypeUpperVectorSave , (0x40 | RefTypeDef), "UVSv" )
+ DEF_REFTYPE(RefTypeUpperVectorRestore , (0x40 | RefTypeUse), "UVRs" )
+ DEF_REFTYPE(RefTypeKillGCRefs , 0x80 , "KlGC" )
// clang-format on
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
//------------------------------------------------------------------------
-// buildUpperVectorSaveRefPositions - Create special RefPositions for saving
-// the upper half of a set of large vectors.
+// makeUpperVectorInterval - Create an Interval for saving and restoring
+// the upper half of a large vector.
//
// Arguments:
-// tree - The current node being handled.
-// currentLoc - The location of the current node.
-// liveLargeVectors - The set of large vector lclVars live across 'tree'.
+// varIndex - The tracked index for a large vector lclVar.
//
-// Notes:
-// At this time we create these RefPositions for any large vectors that are live across this node,
-// though we don't yet know which, if any, will be in a callee-save register at this point.
-// (If they're in a caller-save register, they will simply be fully spilled.)
-// This method is called after all the use and kill RefPositions are created for 'tree' but before
-// any definitions.
-//
-void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree,
- LsraLocation currentLoc,
- VARSET_VALARG_TP liveLargeVectors)
+void LinearScan::makeUpperVectorInterval(unsigned varIndex)
{
- if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, largeVectorVars))
+ Interval* lclVarInterval = getIntervalForLocalVar(varIndex);
+ assert(varTypeNeedsPartialCalleeSave(lclVarInterval->registerType));
+ Interval* newInt = newInterval(LargeVectorSaveType);
+ newInt->relatedInterval = lclVarInterval;
+ newInt->isUpperVector = true;
+}
+
+//------------------------------------------------------------------------
+// getUpperVectorInterval - Get the Interval for saving and restoring
+// the upper half of a large vector.
+//
+// Arguments:
+// varIndex - The tracked index for a large vector lclVar.
+//
+Interval* LinearScan::getUpperVectorInterval(unsigned varIndex)
+{
+ // TODO-Throughput: Consider creating a map from varIndex to upperVector interval.
+ for (Interval& interval : intervals)
{
- VarSetOps::Iter iter(compiler, liveLargeVectors);
- unsigned varIndex = 0;
- while (iter.NextElem(&varIndex))
+ if (interval.isLocalVar)
{
- Interval* varInterval = getIntervalForLocalVar(varIndex);
- Interval* tempInterval = newInterval(varInterval->registerType);
- tempInterval->isInternal = true;
- RefPosition* pos =
- newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
- // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
- // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
- // RefTypeUpperVectorSaveUse RefPosition. We will then restore the relatedInterval onto varInterval,
- // and set varInterval as the relatedInterval of tempInterval.
- tempInterval->relatedInterval = varInterval->relatedInterval;
- varInterval->relatedInterval = tempInterval;
- }
- }
- // For any non-lclVar large vector intervals that are live at this point (i.e. tree temps in the DefList),
- // we will also create a RefTypeUpperVectorSaveDef. For now these will all be spilled at this point,
- // as we don't currently have a mechanism to communicate any non-lclVar intervals that need to be restored.
- // TODO-CQ: Consider reworking this as part of addressing GitHub Issues #18144 and #17481.
- for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
- listNode = listNode->Next())
- {
- var_types treeType = listNode->treeNode->TypeGet();
- if (varTypeIsSIMD(treeType) && varTypeNeedsPartialCalleeSave(treeType))
+ continue;
+ }
+ noway_assert(interval.isUpperVector);
+ if (interval.relatedInterval->getVarIndex(compiler) == varIndex)
{
- RefPosition* pos = newRefPosition(listNode->ref->getInterval(), currentLoc, RefTypeUpperVectorSaveDef, tree,
- RBM_FLT_CALLEE_SAVED);
+ return &interval;
}
}
+ unreached();
}
-// buildUpperVectorRestoreRefPositions - Create special RefPositions for restoring
-// the upper half of a set of large vectors.
+//------------------------------------------------------------------------
+// buildUpperVectorSaveRefPositions - Create special RefPositions for saving
+// the upper half of a set of large vectors.
//
// Arguments:
-// tree - The current node being handled.
-// currentLoc - The location of the current node.
-// liveLargeVectors - The set of large vector lclVars live across 'tree'.
+// tree - The current node being handled
+// currentLoc - The location of the current node
+// fpCalleeKillSet - The set of registers killed by this node.
//
-// Notes:
-// This is called after all the RefPositions for 'tree' have been created, since the restore,
-// if needed, will be inserted after the call.
+// Notes: This is called by BuildDefsWithKills for any node that kills registers in the
+// RBM_FLT_CALLEE_TRASH set. We actually need to find any calls that kill the upper-half
+// of the callee-save vector registers.
+// But we will use as a proxy any node that kills floating point registers.
+// (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
//
-void LinearScan::buildUpperVectorRestoreRefPositions(GenTree* tree,
- LsraLocation currentLoc,
- VARSET_VALARG_TP liveLargeVectors)
+void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet)
{
- assert(enregisterLocalVars);
- if (!VarSetOps::IsEmpty(compiler, liveLargeVectors))
+ if (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, largeVectorVars))
{
+ assert((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE);
+
+ // We only need to save the upper half of any large vector vars that are currently live.
+ VARSET_TP liveLargeVectors(VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
VarSetOps::Iter iter(compiler, liveLargeVectors);
unsigned varIndex = 0;
while (iter.NextElem(&varIndex))
{
- Interval* varInterval = getIntervalForLocalVar(varIndex);
- Interval* tempInterval = varInterval->relatedInterval;
- assert(tempInterval->isInternal == true);
- RefPosition* pos =
- newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED);
- // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
- // of tempInterval.
- varInterval->relatedInterval = tempInterval->relatedInterval;
- tempInterval->relatedInterval = varInterval;
+ Interval* varInterval = getIntervalForLocalVar(varIndex);
+ if (!varInterval->isPartiallySpilled)
+ {
+ Interval* upperVectorInterval = getUpperVectorInterval(varIndex);
+ RefPosition* pos =
+ newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorSave, tree, RBM_FLT_CALLEE_SAVED);
+ varInterval->isPartiallySpilled = true;
+#ifdef _TARGET_XARCH_
+ pos->regOptional = true;
+#endif
+ }
+ }
+ }
+ // For any non-lclVar intervals that are live at this point (i.e. in the DefList), we will also create
+ // a RefTypeUpperVectorSave. For now these will all be spilled at this point, as we don't currently
+ // have a mechanism to communicate any non-lclVar intervals that need to be restored.
+ // TODO-CQ: We could consider adding such a mechanism, but it's unclear whether this rare
+ // case of a large vector temp live across a call is worth the added complexity.
+ for (RefInfoListNode *listNode = defList.Begin(), *end = defList.End(); listNode != end;
+ listNode = listNode->Next())
+ {
+ if (varTypeNeedsPartialCalleeSave(listNode->treeNode->TypeGet()))
+ {
+ RefPosition* pos = newRefPosition(listNode->ref->getInterval(), currentLoc, RefTypeUpperVectorSave, tree,
+ RBM_FLT_CALLEE_SAVED);
}
}
}
{
minRegCountForRef++;
}
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- else if (newRefPosition->refType == RefTypeUpperVectorSaveDef)
- {
- // TODO-Cleanup: won't need a register once #18144 is fixed.
- minRegCountForRef += 1;
- }
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
newRefPosition->minRegCandidateCount = minRegCountForRef;
if (newRefPosition->IsActualRef() && doReverseCallerCallee())
currentLoc += 2;
}
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ // At the end of each block, we'll restore any upperVectors, so reset isPartiallySpilled on all of them.
+ if (enregisterLocalVars)
+ {
+ VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars);
+ unsigned largeVectorVarIndex = 0;
+ while (largeVectorVarsIter.NextElem(&largeVectorVarIndex))
+ {
+ Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex);
+ lclVarInterval->isPartiallySpilled = false;
+ }
+ }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
// Note: the visited set is cleared in LinearScan::doLinearScan()
markBlockVisited(block);
if (!defList.IsEmpty())
setTgtPref(interval, tgtPrefUse);
setTgtPref(interval, tgtPrefUse2);
#endif // _TARGET_XARCH_
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ assert(!interval->isPartiallySpilled);
+#endif
return defRefPosition;
}
//
void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask)
{
- // Generate Kill RefPositions
assert(killMask == getKillSetForNode(tree));
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- VARSET_TP liveLargeVectorVars(VarSetOps::UninitVal());
- bool doLargeVectorRestore = false;
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
// Call this even when killMask is RBM_NONE, as we have to check for some special cases
buildKillPositionsForNode(tree, currentLoc + 1, killMask);
//
if ((killMask & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
{
- if (enregisterLocalVars)
- {
- VarSetOps::AssignNoCopy(compiler, liveLargeVectorVars,
- VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
- }
- buildUpperVectorSaveRefPositions(tree, currentLoc, liveLargeVectorVars);
- doLargeVectorRestore = (enregisterLocalVars && !VarSetOps::IsEmpty(compiler, liveLargeVectorVars));
+ buildUpperVectorSaveRefPositions(tree, currentLoc + 1, killMask);
}
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
// Now, create the Def(s)
BuildDefs(tree, dstCount, dstCandidates);
-
-#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
- // Finally, generate the UpperVectorRestores
- if (doLargeVectorRestore)
- {
- buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectorVars);
- }
-#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
}
//------------------------------------------------------------------------
unsigned varIndex = interval->getVarIndex(compiler);
VarSetOps::RemoveElemD(compiler, currentLiveVars, varIndex);
}
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+ if (interval->isPartiallySpilled)
+ {
+ unsigned varIndex = interval->getVarIndex(compiler);
+ Interval* upperVectorInterval = getUpperVectorInterval(varIndex);
+ RefPosition* pos =
+ newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorRestore, operand, RBM_NONE);
+ interval->isPartiallySpilled = false;
+#ifdef _TARGET_XARCH_
+ pos->regOptional = true;
+#endif
+ }
+#endif
}
else
{
// The upper half of all AVX registers is volatile, even the callee-save registers.
// When a 32-byte SIMD value is live across a call, the register allocator will use this intrinsic
// to cause the upper half to be saved. It will first attempt to find another, unused, callee-save
-// register. If such a register cannot be found, it will save it to an available caller-save register.
-// In that case, this node will be marked GTF_SPILL, which will cause genProduceReg to save the 16 byte
-// value to the stack. (Note that if there are no caller-save registers available, the entire 32 byte
+// register. If such a register cannot be found, it will save the upper half to the upper half
+// of the localVar's home location.
+// (Note that if there are no caller-save registers available, the entire 32 byte
// value will be spilled to the stack.)
//
void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
regNumber targetReg = simdNode->gtRegNum;
regNumber op1Reg = genConsumeReg(op1);
assert(op1Reg != REG_NA);
- assert(targetReg != REG_NA);
- getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, targetReg, op1Reg, 0x01);
-
- genProduceReg(simdNode);
+ if (targetReg != REG_NA)
+ {
+ getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, targetReg, op1Reg, 0x01);
+ genProduceReg(simdNode);
+ }
+ else
+ {
+ // The localVar must have a stack home.
+ unsigned varNum = op1->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
+ assert(varDsc->lvOnFrame);
+ // We want to store this to the upper 16 bytes of this localVar's home.
+ int offs = 16;
+
+ getEmitter()->emitIns_S_R_I(INS_vextractf128, EA_32BYTE, varNum, offs, op1Reg, 0x01);
+ }
}
//-----------------------------------------------------------------------------
// For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always
// have their home register, this node has its targetReg on the lclVar child, and its source
// on the simdNode.
-// Regarding spill, please see the note above on genSIMDIntrinsicUpperSave. If we have spilled
-// an upper-half to a caller save register, this node will be marked GTF_SPILLED. However, unlike
-// most spill scenarios, the saved tree will be different from the restored tree, but the spill
-// restore logic, which is triggered by the call to genConsumeReg, requires us to provide the
-// spilled tree (saveNode) in order to perform the reload. We can easily find that tree,
-// as it is in the spill descriptor for the register from which it was saved.
//
void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
{
regNumber lclVarReg = genConsumeReg(op1);
assert(lclVarReg != REG_NA);
assert(srcReg != REG_NA);
- if (simdNode->gtFlags & GTF_SPILLED)
+ if (srcReg != REG_STK)
+ {
+ getEmitter()->emitIns_R_R_I(INS_vinsertf128, EA_32BYTE, lclVarReg, srcReg, 0x01);
+ }
+ else
{
- GenTree* saveNode = regSet.rsSpillDesc[srcReg]->spillTree;
- noway_assert(saveNode != nullptr && (saveNode->gtRegNum == srcReg));
- genConsumeReg(saveNode);
+ // The localVar must have a stack home.
+ unsigned varNum = op1->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
+ assert(varDsc->lvOnFrame);
+ // We will load this from the upper 16 bytes of this localVar's home.
+ int offs = 16;
+ getEmitter()->emitIns_R_S_I(INS_vinsertf128, EA_32BYTE, lclVarReg, varNum, offs, 0x01);
}
- getEmitter()->emitIns_R_R_I(INS_vinsertf128, EA_32BYTE, lclVarReg, srcReg, 0x01);
}
//------------------------------------------------------------------------
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+public class GitHub_18144
+{
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static void dummy(Vector256<byte> v1, Vector256<byte> v2, Vector256<byte> v3, Vector256<byte> v4,
+ Vector256<byte> v5, Vector256<byte> v6, Vector256<byte> v7, Vector256<byte> v8)
+ {
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static void DoThis()
+ {
+ var vA = Vector256.Create((byte)0xa);
+ var vB = Vector256.Create((byte)0xb);
+ var vC = Vector256.Create((byte)0xc);
+ var vD = Vector256.Create((byte)0xd);
+ var vE = Vector256.Create((byte)0xe);
+ var vF = Vector256.Create((byte)0xf);
+ var vG = Vector256.Create((byte)0x8);
+ var vH = Vector256.Create((byte)0x9);
+ dummy(vA, vB, vC, vD, vE, vF, vG, vH);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static void DoThat() { }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static void dummy128(Vector128<byte> v1, Vector128<byte> v2, Vector128<byte> v3, Vector128<byte> v4,
+ Vector128<byte> v5, Vector128<byte> v6, Vector128<byte> v7, Vector128<byte> v8)
+ {
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ static void DoThis128()
+ {
+ var vA = Vector128.Create((byte)0xa);
+ var vB = Vector128.Create((byte)0xb);
+ var vC = Vector128.Create((byte)0xc);
+ var vD = Vector128.Create((byte)0xd);
+ var vE = Vector128.Create((byte)0xe);
+ var vF = Vector128.Create((byte)0xf);
+ var vG = Vector128.Create((byte)0x8);
+ var vH = Vector128.Create((byte)0x9);
+ dummy128(vA, vB, vC, vD, vE, vF, vG, vH);
+ }
+
+ static int Main(string[] args)
+ {
+ int returnVal = 100;
+
+ var xA = Vector256<byte>.Zero;
+ var xB = Vector256<byte>.Zero;
+ var xC = Vector256<byte>.Zero;
+ var xD = Vector256<byte>.Zero;
+ var xE = Vector256<byte>.Zero;
+ var xF = Vector256<byte>.Zero;
+ var xG = Vector256<byte>.Zero;
+ var xH = Vector256<byte>.Zero;
+
+ DoThis();
+ DoThat();
+
+ Console.WriteLine("{0} {1} {2} {3} {4} {5} {6} {7}", xA, xB, xC, xD, xE, xF, xG, xH);
+ if (!xA.Equals(Vector256<byte>.Zero) || !xB.Equals(Vector256<byte>.Zero) || !xC.Equals(Vector256<byte>.Zero) || !xD.Equals(Vector256<byte>.Zero) ||
+ !xE.Equals(Vector256<byte>.Zero) || !xF.Equals(Vector256<byte>.Zero) || !xG.Equals(Vector256<byte>.Zero) || !xH.Equals(Vector256<byte>.Zero))
+ {
+ returnVal = -1;
+ }
+
+ var vA = Vector128<byte>.Zero;
+ var vB = Vector128<byte>.Zero;
+ var vC = Vector128<byte>.Zero;
+ var vD = Vector128<byte>.Zero;
+ var vE = Vector128<byte>.Zero;
+ var vF = Vector128<byte>.Zero;
+ var vG = Vector128<byte>.Zero;
+ var vH = Vector128<byte>.Zero;
+
+ DoThis128();
+ DoThat();
+
+ Console.WriteLine("{0} {1} {2} {3} {4} {5} {6} {7}", vA, vB, vC, vD, vE, vF, vG, vH);
+ if (!vA.Equals(Vector128<byte>.Zero) || !vB.Equals(Vector128<byte>.Zero) || !vC.Equals(Vector128<byte>.Zero) || !vD.Equals(Vector128<byte>.Zero) ||
+ !vE.Equals(Vector128<byte>.Zero) || !vF.Equals(Vector128<byte>.Zero) || !vG.Equals(Vector128<byte>.Zero) || !vH.Equals(Vector128<byte>.Zero))
+ {
+ returnVal = -1;
+ }
+
+ return returnVal;
+ }
+}
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <SchemaVersion>2.0</SchemaVersion>
+ <ProjectGuid>{2649FAFE-07BF-4F93-8120-BA9A69285ABB}</ProjectGuid>
+ <OutputType>Exe</OutputType>
+ <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
+ <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
+ </PropertyGroup>
+ <!-- Default configurations to help VS understand the configurations -->
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "></PropertyGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
+ <Visible>False</Visible>
+ </CodeAnalysisDependentAssemblyPaths>
+ </ItemGroup>
+ <ItemGroup>
+ <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="$(MSBuildProjectName).cs" />
+ </ItemGroup>
+ <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+ <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>