case NI_Vector256_GetElement:
case NI_Vector512_GetElement:
{
- LowerHWIntrinsicGetElement(node);
-
- if ((node->GetHWIntrinsicId() == NI_Vector128_GetElement) ||
- (node->GetHWIntrinsicId() == NI_Vector256_GetElement) ||
- (node->GetHWIntrinsicId() == NI_Vector512_GetElement))
- {
- // Most NI_Vector*_GetElement intrinsics are lowered to
- // alternative nodes, such as the Extract intrinsics,
- // which are themselves lowered.
- //
- // However, certain types may not have a direct equivalent
- // in which case we specially handle them directly as GetElement
- // and want to do the relevant containment checks.
- ContainCheckHWIntrinsic(node);
- }
-
- return node->gtNext;
+ return LowerHWIntrinsicGetElement(node);
}
case NI_Vector256_GetUpper:
case NI_Vector256_ToScalar:
case NI_Vector512_ToScalar:
{
- LowerHWIntrinsicToScalar(node);
- break;
+ return LowerHWIntrinsicToScalar(node);
}
case NI_SSE41_Extract:
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
var_types simdType = node->gtType;
}
node->ResetHWIntrinsicId(intrinsicId, op1);
- LowerNode(node);
-
- return;
+ return LowerNode(node);
}
+ uint32_t count = simdSize / genTypeSize(simdBaseType);
+ uint32_t elemSize = genTypeSize(simdBaseType);
+
if (op1->OperIs(GT_IND))
{
- // If the vector is already in memory, we force its
- // addr to be evaluated into a reg. This would allow
- // us to generate [regBase] or [regBase + offset] or
- // [regBase + sizeOf(simdBaseType) * regIndex] to access
- // the required vector element directly from memory.
- //
- // TODO-CQ-XARCH: If addr of GT_IND is GT_LEA, we
- // might be able update GT_LEA to fold the regIndex
- // or offset in some cases. Instead with this
- // approach we always evaluate GT_LEA into a reg.
- // Ideally, we should be able to lower GetItem intrinsic
- // into GT_IND(newAddr) where newAddr combines
- // the addr of the vector with the given index.
- op1->gtFlags |= GTF_IND_REQ_ADDR_IN_REG;
+ // We want to optimize GetElement down to an Indir where possible as
+ // this unlocks additional containment opportunities for various nodes
+
+ var_types newType;
+ GenTree* newBase;
+ GenTree* newIndex;
+ uint32_t newScale;
+ int32_t newOffset;
+
+ GenTreeIndir* indir = op1->AsIndir();
+ GenTree* addr = indir->Addr();
+
+ newType = simdBaseType;
+
+ if (addr->OperIsAddrMode())
+ {
+ // We have an existing addressing mode, so we want to try and
+ // combine with that where possible to keep things as a 1x LEA
+
+ GenTreeAddrMode* addrMode = addr->AsAddrMode();
+
+ newBase = addrMode->Base();
+ newIndex = addrMode->Index();
+ newScale = addrMode->GetScale();
+ newOffset = addrMode->Offset();
+
+ if (op2->OperIsConst() && (newOffset < (INT32_MAX - static_cast<int>(simdSize))))
+ {
+ // op2 is a constant, so add it to the existing offset
+
+ BlockRange().Remove(addrMode);
+ BlockRange().Remove(op2);
+
+ int32_t addOffset = (static_cast<uint8_t>(op2->AsIntCon()->IconValue()) % count);
+ addOffset *= static_cast<int32_t>(elemSize);
+
+ newOffset += addOffset;
+ }
+ else if (newIndex == nullptr)
+ {
+ // op2 is not a constant and the addressing mode doesn't
+ // have its own existing index, so use our index and scale
+
+ BlockRange().Remove(addrMode);
+
+ newIndex = op2;
+ newScale = elemSize;
+ }
+ else if (addrMode->GetScale() == elemSize)
+ {
+ // op2 is not a constant but the addressing mode has its
+ // own already with a matching scale, so add ours to theirs
+
+ BlockRange().Remove(addrMode);
+
+ newIndex = comp->gtNewOperNode(GT_ADD, TYP_I_IMPL, newIndex, op2);
+ BlockRange().InsertBefore(node, newIndex);
+
+ LowerNode(newIndex);
+ }
+ else
+ {
+ // op2 is not a constant but the addressing mode is already
+ // complex, so build a new addressing mode with the prev as our base
+
+ newBase = addrMode;
+ newIndex = op2;
+ newScale = elemSize;
+ newOffset = 0;
+ }
+ }
+ else if (op2->OperIsConst())
+ {
+ // We don't have an addressing mode, so build one with the old addr
+ // as the base and the offset using the op2 constant and scale
+
+ BlockRange().Remove(op2);
+
+ newBase = addr;
+ newIndex = nullptr;
+ newScale = 0;
+ newOffset = (static_cast<uint8_t>(op2->AsIntCon()->IconValue()) % count);
+ newOffset *= static_cast<int32_t>(elemSize);
+ }
+ else
+ {
+ // We don't have an addressing mode, so build one with the old addr
+ // as the base and the index set to op2
+
+ newBase = addr;
+ newIndex = op2;
+ newScale = elemSize;
+ newOffset = 0;
+ }
+
+ if (newBase != nullptr)
+ {
+ newBase->ClearContained();
+ }
+
+ if (newIndex != nullptr)
+ {
+ newIndex->ClearContained();
+ }
+
+ GenTreeAddrMode* newAddr =
+ new (comp, GT_LEA) GenTreeAddrMode(addr->TypeGet(), newBase, newIndex, newScale, newOffset);
+ BlockRange().InsertBefore(node, newAddr);
+
+ GenTreeIndir* newIndir = comp->gtNewIndir(newType, newAddr, (indir->gtFlags & GTF_IND_FLAGS));
+ BlockRange().InsertBefore(node, newIndir);
+
+ LIR::Use use;
+ if (BlockRange().TryGetUse(node, &use))
+ {
+ use.ReplaceWith(newIndir);
+ }
+
+ BlockRange().Remove(op1);
+ BlockRange().Remove(node);
+
+ assert(newAddr->gtNext == newIndir);
+ return LowerNode(newAddr);
}
if (!op2->OperIsConst())
{
// We will specially handle GetElement in codegen when op2 isn't a constant
- return;
+ ContainCheckHWIntrinsic(node);
+ return node->gtNext;
}
// We should have a bounds check inserted for any index outside the allowed range
// but we need to generate some code anyways, and so we'll simply mask here for simplicity.
- ssize_t count = simdSize / genTypeSize(simdBaseType);
- ssize_t imm8 = static_cast<uint8_t>(op2->AsIntCon()->IconValue()) % count;
- ssize_t simd16Cnt = 16 / genTypeSize(simdBaseType);
- ssize_t simd16Idx = imm8 / simd16Cnt;
+ uint32_t imm8 = static_cast<uint8_t>(op2->AsIntCon()->IconValue()) % count;
+ uint32_t simd16Cnt = 16 / elemSize;
+ uint32_t simd16Idx = imm8 / simd16Cnt;
- assert(0 <= imm8 && imm8 < count);
+ assert((0 <= imm8) && (imm8 < count));
- if (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1))
+ if (IsContainableMemoryOp(op1))
{
- // We will specially handle GetElement in codegen when op1 is already in memory
- op2->AsIntCon()->SetIconValue(imm8);
- return;
+ // We will specially handle GetElement when op1 is already in memory
+
+ if (op1->OperIs(GT_LCL_VAR, GT_LCL_FLD))
+ {
+ // We want to optimize GetElement down to a LclFld where possible as
+ // this unlocks additional containment opportunities for various nodes
+
+ GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
+ uint32_t lclOffs = lclVar->GetLclOffs() + (imm8 * elemSize);
+ LclVarDsc* lclDsc = comp->lvaGetDesc(lclVar);
+
+ if (lclDsc->lvDoNotEnregister && (lclOffs <= 0xFFFF) && ((lclOffs + elemSize) <= lclDsc->lvExactSize()))
+ {
+ GenTree* lclFld =
+ comp->gtNewLclFldNode(lclVar->GetLclNum(), simdBaseType, static_cast<uint16_t>(lclOffs));
+ BlockRange().InsertBefore(node, lclFld);
+
+ LIR::Use use;
+ if (BlockRange().TryGetUse(node, &use))
+ {
+ use.ReplaceWith(lclFld);
+ }
+
+ BlockRange().Remove(op1);
+ BlockRange().Remove(op2);
+ BlockRange().Remove(node);
+
+ return LowerNode(lclFld);
+ }
+ }
+
+ if (IsSafeToContainMem(node, op1))
+ {
+ // Handle other cases in codegen
+
+ op2->AsIntCon()->SetIconValue(imm8);
+ ContainCheckHWIntrinsic(node);
+
+ return node->gtNext;
+ }
}
switch (simdBaseType)
node->SetSimdSize(16);
node->ResetHWIntrinsicId(NI_Vector128_ToScalar, op1);
- LowerNode(node);
- return;
+ return LowerNode(node);
}
else
{
node->ResetHWIntrinsicId(resIntrinsic, op1, op2);
}
+ GenTree* next = node->gtNext;
+
if (node->GetHWIntrinsicId() != intrinsicId)
{
- LowerNode(node);
+ next = LowerNode(node);
+ }
+ else
+ {
+ ContainCheckHWIntrinsic(node);
}
if ((simdBaseType == TYP_BYTE) || (simdBaseType == TYP_SHORT))
{
use.ReplaceWith(cast);
}
- LowerNode(cast);
+ next = LowerNode(cast);
}
+
+ return next;
}
//----------------------------------------------------------------------------------------------
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
GenTree* op1 = node->Op(1);
- if (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1))
+ if (IsContainableMemoryOp(op1))
{
- // We will specially handle ToScalar in codegen when op1 is already in memory
- return;
+ // We will specially handle ToScalar when op1 is already in memory
+
+ if (op1->OperIs(GT_IND))
+ {
+ // We want to optimize ToScalar down to an Indir where possible as
+ // this unlocks additional containment opportunities for various nodes
+
+ GenTreeIndir* indir = op1->AsIndir();
+
+ GenTreeIndir* newIndir = comp->gtNewIndir(simdBaseType, indir->Addr(), (indir->gtFlags & GTF_IND_FLAGS));
+ BlockRange().InsertBefore(node, newIndir);
+
+ LIR::Use use;
+ if (BlockRange().TryGetUse(node, &use))
+ {
+ use.ReplaceWith(newIndir);
+ }
+
+ BlockRange().Remove(op1);
+ BlockRange().Remove(node);
+
+ return LowerNode(newIndir);
+ }
+
+ if (op1->OperIs(GT_LCL_VAR, GT_LCL_FLD))
+ {
+ uint32_t elemSize = genTypeSize(simdBaseType);
+
+ // We want to optimize ToScalar down to a LclFld where possible as
+ // this unlocks additional containment opportunities for various nodes
+
+ GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
+ uint32_t lclOffs = lclVar->GetLclOffs() + (0 * elemSize);
+ LclVarDsc* lclDsc = comp->lvaGetDesc(lclVar);
+
+ if (lclDsc->lvDoNotEnregister && (lclOffs <= 0xFFFF) && ((lclOffs + elemSize) <= lclDsc->lvExactSize()))
+ {
+ GenTree* lclFld = comp->gtNewLclFldNode(lclVar->GetLclNum(), simdBaseType, lclVar->GetLclOffs());
+ BlockRange().InsertBefore(node, lclFld);
+
+ LIR::Use use;
+ if (BlockRange().TryGetUse(node, &use))
+ {
+ use.ReplaceWith(lclFld);
+ }
+
+ BlockRange().Remove(op1);
+ BlockRange().Remove(node);
+
+ return LowerNode(lclFld);
+ }
+ }
+
+ if (IsSafeToContainMem(node, op1))
+ {
+ // Handle other cases in codegen
+ ContainCheckHWIntrinsic(node);
+ return node->gtNext;
+ }
}
switch (simdBaseType)
case TYP_DOUBLE:
{
ContainCheckHWIntrinsic(node);
- return;
+ return node->gtNext;
}
default:
}
}
- LowerNode(node);
+ GenTree* next = LowerNode(node);
if (genTypeSize(simdBaseType) < 4)
{
{
use.ReplaceWith(cast);
}
- LowerNode(cast);
+ next = LowerNode(cast);
}
+
+ return next;
}
//----------------------------------------------------------------------------------------------
case NI_Vector256_GetElement:
case NI_Vector512_GetElement:
{
- if (op1->OperIs(GT_IND))
- {
- assert((op1->gtFlags & GTF_IND_REQ_ADDR_IN_REG) != 0);
- op1->AsIndir()->Addr()->ClearContained();
- }
-
if (op2->OperIsConst())
{
MakeSrcContained(node, op2);
if (IsContainableMemoryOp(op1) && IsSafeToContainMem(node, op1))
{
MakeSrcContained(node, op1);
-
- if (op1->OperIs(GT_IND))
- {
- op1->AsIndir()->Addr()->ClearContained();
- }
}
break;
}