ValueNumPair vnPair = tree->gtVNPair;
ValueNum vnCns = vnStore->VNConservativeNormalValue(vnPair);
- // Check if node evaluates to a constant or Vector.Zero.
- if (!vnStore->IsVNConstant(vnCns) && !vnStore->IsVNVectorZero(vnCns))
+ // Check if node evaluates to a constant
+ if (!vnStore->IsVNConstant(vnCns))
{
return nullptr;
}
}
break;
-#if FEATURE_HW_INTRINSICS
+#if FEATURE_SIMD
case TYP_SIMD8:
+ {
+ simd8_t value = vnStore->ConstantValue<simd8_t>(vnCns);
+
+ GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT);
+ vecCon->gtSimd8Val = value;
+
+ conValTree = vecCon;
+ break;
+ }
+
case TYP_SIMD12:
+ {
+ simd12_t value = vnStore->ConstantValue<simd12_t>(vnCns);
+
+ GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT);
+ vecCon->gtSimd12Val = value;
+
+ conValTree = vecCon;
+ break;
+ }
+
case TYP_SIMD16:
+ {
+ simd16_t value = vnStore->ConstantValue<simd16_t>(vnCns);
+
+ GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT);
+ vecCon->gtSimd16Val = value;
+
+ conValTree = vecCon;
+ break;
+ }
+
case TYP_SIMD32:
{
- assert(vnStore->IsVNVectorZero(vnCns));
- VNSimdTypeInfo vnInfo = vnStore->GetVectorZeroSimdTypeOfVN(vnCns);
+ simd32_t value = vnStore->ConstantValue<simd32_t>(vnCns);
- assert(vnInfo.m_simdBaseJitType != CORINFO_TYPE_UNDEF);
- assert(vnInfo.m_simdSize != 0);
- assert(getSIMDTypeForSize(vnInfo.m_simdSize) == vnStore->TypeOfVN(vnCns));
+ GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), CORINFO_TYPE_FLOAT);
+ vecCon->gtSimd32Val = value;
- conValTree = gtNewSimdZeroNode(tree->TypeGet(), vnInfo.m_simdBaseJitType, vnInfo.m_simdSize, true);
+ conValTree = vecCon;
+ break;
}
break;
-#endif
+#endif // FEATURE_SIMD
case TYP_BYREF:
// Do not support const byref optimization.
//
GenTree* Compiler::optExtractSideEffListFromConst(GenTree* tree)
{
- assert(vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair)) ||
- vnStore->IsVNVectorZero(vnStore->VNConservativeNormalValue(tree->gtVNPair)));
+ assert(vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair)));
GenTree* sideEffList = nullptr;
<Type Name="GenTreeStrCon">
<DisplayString>CNS_STR</DisplayString>
</Type>
+ <Type Name="GenTreeVecCon">
+ <DisplayString>CNS_VEC</DisplayString>
+ </Type>
<Type Name="GenTreeLngCon">
<DisplayString>{gtTreeID, d}: [[LngCon={((GenTreeLngCon*)this)->gtLconVal, l}]</DisplayString>
</Type>
}
break;
+ case GT_CNS_VEC:
+ {
+ unreached();
+ }
+
default:
unreached();
}
}
break;
+ case GT_CNS_VEC:
+ {
+ GenTreeVecCon* vecCon = tree->AsVecCon();
+
+ emitter* emit = GetEmitter();
+ emitAttr attr = emitTypeSize(targetType);
+
+ switch (tree->TypeGet())
+ {
+#if defined(FEATURE_SIMD)
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ case TYP_SIMD8:
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+
+ if (vecCon->IsAllBitsSet())
+ {
+ emit->emitIns_R_I(INS_mvni, attr, targetReg, 0, INS_OPTS_2S);
+ }
+ else if (vecCon->IsZero())
+ {
+ emit->emitIns_R_I(INS_movi, attr, targetReg, 0, INS_OPTS_2S);
+ }
+ else
+ {
+ // Get a temp integer register to compute long address.
+ regNumber addrReg = tree->GetSingleTempReg();
+
+ simd8_t constValue = vecCon->gtSimd8Val;
+ CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(constValue);
+
+ emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0);
+ }
+ break;
+ }
+
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ {
+ if (vecCon->IsAllBitsSet())
+ {
+ emit->emitIns_R_I(INS_mvni, attr, targetReg, 0, INS_OPTS_4S);
+ }
+ else if (vecCon->IsZero())
+ {
+ emit->emitIns_R_I(INS_movi, attr, targetReg, 0, INS_OPTS_4S);
+ }
+ else
+ {
+ // Get a temp integer register to compute long address.
+ regNumber addrReg = tree->GetSingleTempReg();
+
+ simd16_t constValue = vecCon->gtSimd16Val;
+ CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(constValue);
+
+ emit->emitIns_R_C(INS_ldr, attr, targetReg, addrReg, hnd, 0);
+ }
+ break;
+ }
+#endif // FEATURE_SIMD
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ break;
+ }
+
default:
unreached();
}
}
else if (data->isContained())
{
- assert(data->OperIs(GT_BITCAST));
- const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1();
- assert(!bitcastSrc->isContained());
- dataReg = bitcastSrc->GetRegNum();
+ if (data->IsCnsVec())
+ {
+ assert(data->AsVecCon()->IsZero());
+ dataReg = REG_ZR;
+ }
+ else
+ {
+ assert(data->OperIs(GT_BITCAST));
+ const GenTree* bitcastSrc = data->AsUnOp()->gtGetOp1();
+ assert(!bitcastSrc->isContained());
+ dataReg = bitcastSrc->GetRegNum();
+ }
}
else
{
if (data->isContained())
{
// This is only possible for a zero-init or bitcast.
- const bool zeroInit = (data->IsIntegralConst(0) || data->IsSIMDZero());
+ const bool zeroInit = (data->IsIntegralConst(0) || data->IsVectorZero());
assert(zeroInit || data->OperIs(GT_BITCAST));
if (zeroInit && varTypeIsSIMD(targetType))
assert(!op1->isContained());
assert(op1Type == op2Type);
- if (op2->IsFPZero())
+ if (op2->IsFloatPositiveZero())
{
assert(op2->isContained());
emit->emitIns_R_F(INS_fcmp, cmpSize, op1->GetRegNum(), 0.0);
if (op1->isContained())
{
// This is only possible for a zero-init.
- assert(op1->IsIntegralConst(0) || op1->IsSIMDZero());
+ assert(op1->IsIntegralConst(0) || op1->IsVectorZero());
// store lower 8 bytes
GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, REG_ZR, varNum, offs);
case GT_CNS_INT:
case GT_CNS_DBL:
+ case GT_CNS_VEC:
genSetRegToConst(targetReg, targetType, treeNode);
genProduceReg(treeNode);
break;
#ifdef FEATURE_SIMD
// (In)Equality operation that produces bool result, when compared
// against Vector zero, marks its Vector Zero operand as contained.
- assert(tree->OperIsLeaf() || tree->IsSIMDZero() || tree->IsVectorZero());
+ assert(tree->OperIsLeaf() || tree->IsVectorZero());
#else
assert(tree->OperIsLeaf());
#endif
/***********************************************************************************
*
* Generate code to set a register 'targetReg' of type 'targetType' to the constant
- * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
- * genProduceReg() on the target register.
+ * specified by the constant (GT_CNS_INT, GT_CNS_DBL, or GT_CNS_VEC) in 'tree'. This
+ * does not call genProduceReg() on the target register.
*/
void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTree* tree)
{
}
break;
+ case GT_CNS_VEC:
+ {
+ GenTreeVecCon* vecCon = tree->AsVecCon();
+
+ emitter* emit = GetEmitter();
+ emitAttr attr = emitTypeSize(targetType);
+
+ if (vecCon->IsAllBitsSet())
+ {
+#if defined(FEATURE_SIMD)
+ emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
+#else
+ emit->emitIns_R_R(INS_pcmpeqd, attr, targetReg, targetReg);
+#endif // FEATURE_SIMD
+ break;
+ }
+
+ if (vecCon->IsZero())
+ {
+#if defined(FEATURE_SIMD)
+ emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg);
+#else
+ emit->emitIns_R_R(INS_xorps, attr, targetReg, targetReg);
+#endif // FEATURE_SIMD
+ break;
+ }
+
+ switch (tree->TypeGet())
+ {
+#if defined(FEATURE_SIMD)
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ case TYP_SIMD8:
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+
+ simd8_t constValue = vecCon->gtSimd8Val;
+ CORINFO_FIELD_HANDLE hnd = emit->emitSimd8Const(constValue);
+
+ emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
+ break;
+ }
+
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ {
+ simd16_t constValue = vecCon->gtSimd16Val;
+ CORINFO_FIELD_HANDLE hnd = emit->emitSimd16Const(constValue);
+
+ emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
+ break;
+ }
+
+ case TYP_SIMD32:
+ {
+ simd32_t constValue = vecCon->gtSimd32Val;
+ CORINFO_FIELD_HANDLE hnd = emit->emitSimd32Const(constValue);
+
+ emit->emitIns_R_C(ins_Load(targetType), attr, targetReg, hnd, 0);
+ break;
+ }
+#endif // FEATURE_SIMD
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ break;
+ }
+
default:
unreached();
}
genProduceReg(treeNode);
break;
+ case GT_CNS_VEC:
+ genSetRegToConst(targetReg, targetType, treeNode);
+ genProduceReg(treeNode);
+ break;
+
case GT_NOT:
case GT_NEG:
genCodeForNegNot(treeNode);
// zero in the target register, because an xor is smaller than a copy. Note that we could
// potentially handle this in the register allocator, but we can't always catch it there
// because the target may not have a register allocated for it yet.
- if (op1->isUsedFromReg() && (op1->GetRegNum() != targetReg) && (op1->IsIntegralConst(0) || op1->IsFPZero()))
+ if (op1->isUsedFromReg() && (op1->GetRegNum() != targetReg) &&
+ (op1->IsIntegralConst(0) || op1->IsFloatPositiveZero()))
{
op1->SetRegNum(REG_NA);
op1->ResetReuseRegVal();
GenTree* gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle);
+ GenTreeVecCon* gtNewVconNode(var_types type, CorInfoType simdBaseJitType);
+
+ GenTree* gtNewAllBitsSetConNode(var_types type);
+ GenTree* gtNewAllBitsSetConNode(var_types type, CorInfoType simdBaseJitType);
+
GenTree* gtNewZeroConNode(var_types type);
+ GenTree* gtNewZeroConNode(var_types type, CorInfoType simdBaseJitType);
GenTree* gtNewOneConNode(var_types type);
GenTreeLclVar* gtNewStoreLclVar(unsigned dstLclNum, GenTree* src);
-#ifdef FEATURE_SIMD
- GenTree* gtNewSIMDVectorZero(var_types simdType, CorInfoType simdBaseJitType, unsigned simdSize);
-#endif
-
GenTree* gtNewBlkOpNode(GenTree* dst, GenTree* srcOrFillVal, bool isVolatile, bool isCopyBlock);
GenTree* gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg);
unsigned simdSize,
bool isSimdAsHWIntrinsic);
- GenTree* gtNewSimdZeroNode(var_types type,
- CorInfoType simdBaseJitType,
- unsigned simdSize,
- bool isSimdAsHWIntrinsic);
-
GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID);
GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID);
GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type,
SIMDHandlesCache* m_simdHandleCache;
- // Get an appropriate "zero" for the given type and class handle.
- GenTree* gtGetSIMDZero(var_types simdType, CorInfoType simdBaseJitType, CORINFO_CLASS_HANDLE simdHandle);
-
// Get the handle for a SIMD type.
CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, CorInfoType simdBaseJitType)
{
clsHnd = gtGetStructHandleForHWSIMD(simdType, simdBaseJitType);
}
- assert(clsHnd != NO_CLASS_HANDLE);
return clsHnd;
}
#endif // FEATURE_HW_INTRINSICS
case GT_CNS_LNG:
case GT_CNS_DBL:
case GT_CNS_STR:
+ case GT_CNS_VEC:
case GT_MEMORYBARRIER:
case GT_JMP:
case GT_JCC:
case GT_CNS_LNG:
case GT_CNS_DBL:
case GT_CNS_STR:
+ case GT_CNS_VEC:
case GT_MEMORYBARRIER:
case GT_JMP:
case GT_JCC:
return emitComp->eeFindJitDataOffs(cnum);
}
+//------------------------------------------------------------------------
+// emitSimd8Const: Create a simd8 data section constant.
+//
+// Arguments:
+// constValue - constant value
+//
+// Return Value:
+// A field handle representing the data offset to access the constant.
+//
+CORINFO_FIELD_HANDLE emitter::emitSimd8Const(simd8_t constValue)
+{
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(FEATURE_SIMD)
+ unsigned cnsSize = 8;
+ unsigned cnsAlign = cnsSize;
+
+#ifdef TARGET_XARCH
+ if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
+ {
+ cnsAlign = dataSection::MIN_DATA_ALIGN;
+ }
+#endif // TARGET_XARCH
+
+ UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD8);
+ return emitComp->eeFindJitDataOffs(cnum);
+#else
+ unreached();
+#endif // !FEATURE_SIMD
+}
+
+CORINFO_FIELD_HANDLE emitter::emitSimd16Const(simd16_t constValue)
+{
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(FEATURE_SIMD)
+ unsigned cnsSize = 16;
+ unsigned cnsAlign = cnsSize;
+
+#ifdef TARGET_XARCH
+ if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
+ {
+ cnsAlign = dataSection::MIN_DATA_ALIGN;
+ }
+#endif // TARGET_XARCH
+
+ UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD16);
+ return emitComp->eeFindJitDataOffs(cnum);
+#else
+ unreached();
+#endif // !FEATURE_SIMD
+}
+
+CORINFO_FIELD_HANDLE emitter::emitSimd32Const(simd32_t constValue)
+{
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(FEATURE_SIMD)
+ unsigned cnsSize = 32;
+ unsigned cnsAlign = cnsSize;
+
+#ifdef TARGET_XARCH
+ if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
+ {
+ cnsAlign = dataSection::MIN_DATA_ALIGN;
+ }
+#endif // TARGET_XARCH
+
+ UNATIVE_OFFSET cnum = emitDataConst(&constValue, cnsSize, cnsAlign, TYP_SIMD32);
+ return emitComp->eeFindJitDataOffs(cnum);
+#else
+ unreached();
+#endif // !FEATURE_SIMD
+}
+
/*****************************************************************************
*
* Output the given data section at the specified address.
private:
CORINFO_FIELD_HANDLE emitFltOrDblConst(double constValue, emitAttr attr);
+ CORINFO_FIELD_HANDLE emitSimd8Const(simd8_t constValue);
+ CORINFO_FIELD_HANDLE emitSimd16Const(simd16_t constValue);
+ CORINFO_FIELD_HANDLE emitSimd32Const(simd32_t constValue);
regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src);
regNumber emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2);
void emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem);
pushedStack.PushConstant();
// TODO: check if it's a loop condition - we unroll such loops.
break;
- case NI_Vector256_get_Zero:
- case NI_Vector256_get_AllBitsSet:
- foldableIntrinsic = true;
- pushedStack.PushUnknown();
- break;
#elif defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS)
case NI_Vector64_get_Count:
case NI_Vector128_get_Count:
foldableIntrinsic = true;
pushedStack.PushConstant();
break;
- case NI_Vector128_get_Zero:
- case NI_Vector128_get_AllBitsSet:
- foldableIntrinsic = true;
- pushedStack.PushUnknown();
- break;
#endif
default:
*/
GenTree* Compiler::fgGetStructAsStructPtr(GenTree* tree)
{
- noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() ||
- tree->OperIsHWIntrinsic());
+ noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) ||
+ tree->OperIsSimdOrHWintrinsic() || tree->IsCnsVec());
// GT_CALL, cannot get address of call.
// GT_MKREFANY, inlining should've been aborted due to mkrefany opcode.
// GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
}
GenTree::s_gtNodeSizes[GT_CALL] = TREE_NODE_SZ_LARGE;
+ GenTree::s_gtNodeSizes[GT_CNS_VEC] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_CAST] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_FTN_ADDR] = TREE_NODE_SZ_LARGE;
GenTree::s_gtNodeSizes[GT_BOX] = TREE_NODE_SZ_LARGE;
static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL);
+ static_assert_no_msg(sizeof(GenTreeVecCon) <= TREE_NODE_SZ_LARGE); // *** large node
static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL);
}
break;
+ case GT_CNS_VEC:
+ {
+ if (GenTreeVecCon::Equals(op1->AsVecCon(), op2->AsVecCon()))
+ {
+ return true;
+ }
+ break;
+ }
+
#if 0
// TODO-CQ: Enable this in the future
case GT_CNS_LNG:
add = tree->AsStrCon()->gtSconCPX;
break;
+ case GT_CNS_VEC:
+ {
+ GenTreeVecCon* vecCon = tree->AsVecCon();
+ add = 0;
+
+ switch (vecCon->TypeGet())
+ {
+#if defined(FEATURE_SIMD)
+ case TYP_SIMD32:
+ {
+ add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[7]);
+ add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[6]);
+ add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[5]);
+ add = genTreeHashAdd(ulo32(add), vecCon->gtSimd32Val.u32[4]);
+ FALLTHROUGH;
+ }
+
+ case TYP_SIMD16:
+ {
+ add = genTreeHashAdd(ulo32(add), vecCon->gtSimd16Val.u32[3]);
+ FALLTHROUGH;
+ }
+
+ case TYP_SIMD12:
+ {
+ add = genTreeHashAdd(ulo32(add), vecCon->gtSimd12Val.u32[2]);
+ FALLTHROUGH;
+ }
+
+ case TYP_SIMD8:
+ case TYP_DOUBLE:
+ case TYP_LONG:
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+ add = genTreeHashAdd(ulo32(add), vecCon->gtSimd8Val.u32[1]);
+ add = genTreeHashAdd(ulo32(add), vecCon->gtSimd8Val.u32[0]);
+ break;
+ }
+#endif // FEATURE_SIMD
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ add = genTreeHashAdd(ulo32(add), vecCon->GetSimdBaseType());
+ add = genTreeHashAdd(ulo32(add), vecCon->GetSimdSize());
+ break;
+ }
+
case GT_JMP:
add = tree->AsVal()->gtVal1;
break;
#if defined(TARGET_XARCH)
/* We use fldz and fld1 to load 0.0 and 1.0, but all other */
/* floating point constants are loaded using an indirection */
- if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) ||
- (*((__int64*)&(tree->AsDblCon()->gtDconVal)) == I64(0x3ff0000000000000)))
+ if (tree->IsFloatPositiveZero())
{
costEx = 1;
costSz = 1;
costSz = 2 + 8;
}
#elif defined(TARGET_ARM64)
- if ((*((__int64*)&(tree->AsDblCon()->gtDconVal)) == 0) ||
- emitter::emitIns_valid_imm_for_fmov(tree->AsDblCon()->gtDconVal))
+ if (tree->IsFloatPositiveZero() || emitter::emitIns_valid_imm_for_fmov(tree->AsDblCon()->gtDconVal))
{
costEx = 1;
costSz = 1;
}
break;
+ case GT_CNS_VEC:
+ {
+ costEx = IND_COST_EX;
+ costSz = 4;
+ level = 0;
+ break;
+ }
+
case GT_LCL_VAR:
level = 1;
if (gtIsLikelyRegVar(tree))
case GT_CNS_LNG:
case GT_CNS_DBL:
case GT_CNS_STR:
+ case GT_CNS_VEC:
case GT_MEMORYBARRIER:
case GT_JMP:
case GT_JCC:
return node;
}
+GenTreeVecCon* Compiler::gtNewVconNode(var_types type, CorInfoType simdBaseJitType)
+{
+ GenTreeVecCon* vecCon = new (this, GT_CNS_VEC) GenTreeVecCon(type, simdBaseJitType, genTypeSize(type));
+ return vecCon;
+}
+
+GenTree* Compiler::gtNewAllBitsSetConNode(var_types type)
+{
+ GenTree* allBitsSet;
+
+ switch (type)
+ {
+ case TYP_INT:
+ allBitsSet = gtNewIconNode(-1);
+ break;
+
+ case TYP_LONG:
+ allBitsSet = gtNewLconNode(-1);
+ break;
+
+ default:
+ noway_assert(!"Bad type in gtNewAllBitsSetConNode");
+ allBitsSet = nullptr;
+ break;
+ }
+
+ return allBitsSet;
+}
+
+GenTree* Compiler::gtNewAllBitsSetConNode(var_types type, CorInfoType simdBaseJitType)
+{
+ assert(varTypeIsSIMD(type));
+ assert(simdBaseJitType != CORINFO_TYPE_UNDEF);
+
+ GenTreeVecCon* vecCon = gtNewVconNode(type, simdBaseJitType);
+
+ vecCon->gtSimd32Val.i64[0] = -1;
+ vecCon->gtSimd32Val.i64[1] = -1;
+ vecCon->gtSimd32Val.i64[2] = -1;
+ vecCon->gtSimd32Val.i64[3] = -1;
+
+ return vecCon;
+}
+
GenTree* Compiler::gtNewZeroConNode(var_types type)
{
GenTree* zero;
return zero;
}
+GenTree* Compiler::gtNewZeroConNode(var_types type, CorInfoType simdBaseJitType)
+{
+ assert(varTypeIsSIMD(type));
+ assert(simdBaseJitType != CORINFO_TYPE_UNDEF);
+
+ GenTreeVecCon* vecCon = gtNewVconNode(type, simdBaseJitType);
+ vecCon->gtSimd32Val = {};
+ return vecCon;
+}
+
GenTree* Compiler::gtNewOneConNode(var_types type)
{
GenTree* one;
return store;
}
-#ifdef FEATURE_SIMD
-//---------------------------------------------------------------------
-// gtNewSIMDVectorZero: create a GT_SIMD node for Vector<T>.Zero
-//
-// Arguments:
-// simdType - simd vector type
-// simdBaseJitType - element type of vector
-// simdSize - size of vector in bytes
-GenTree* Compiler::gtNewSIMDVectorZero(var_types simdType, CorInfoType simdBaseJitType, unsigned simdSize)
-{
- var_types simdBaseType = genActualType(JitType2PreciseVarType(simdBaseJitType));
- GenTree* initVal = gtNewZeroConNode(simdBaseType);
- initVal->gtType = simdBaseType;
- return gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType, simdSize);
-}
-#endif // FEATURE_SIMD
-
GenTreeCall* Compiler::gtNewIndCallNode(GenTree* addr, var_types type, const DebugInfo& di)
{
return gtNewCallNode(CT_INDIRECT, (CORINFO_METHOD_HANDLE)addr, type, di);
copy = gtNewLconNode(tree->AsLngCon()->gtLconVal);
break;
+ case GT_CNS_DBL:
+ {
+ copy = gtNewDconNode(tree->AsDblCon()->gtDconVal, tree->TypeGet());
+ break;
+ }
+
+ case GT_CNS_VEC:
+ {
+ GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), tree->AsVecCon()->GetSimdBaseJitType());
+ vecCon->gtSimd32Val = tree->AsVecCon()->gtSimd32Val;
+ copy = vecCon;
+ break;
+ }
+
case GT_LCL_VAR:
copy = gtNewLclvNode(tree->AsLclVarCommon()->GetLclNum(),
tree->TypeGet() DEBUGARG(tree->AsLclVar()->gtLclILoffs));
goto DONE;
case GT_CNS_DBL:
- copy = gtNewDconNode(tree->AsDblCon()->gtDconVal);
- copy->gtType = tree->gtType; // keep the same type
+ {
+ copy = gtNewDconNode(tree->AsDblCon()->gtDconVal, tree->TypeGet());
goto DONE;
+ }
case GT_CNS_STR:
copy = gtNewSconNode(tree->AsStrCon()->gtSconCPX, tree->AsStrCon()->gtScpHnd);
goto DONE;
+ case GT_CNS_VEC:
+ {
+ GenTreeVecCon* vecCon = gtNewVconNode(tree->TypeGet(), tree->AsVecCon()->GetSimdBaseJitType());
+ vecCon->gtSimd32Val = tree->AsVecCon()->gtSimd32Val;
+ copy = vecCon;
+ goto DONE;
+ }
+
case GT_LCL_VAR:
if (tree->AsLclVarCommon()->GetLclNum() == varNum)
case GT_CNS_LNG:
case GT_CNS_DBL:
case GT_CNS_STR:
+ case GT_CNS_VEC:
case GT_MEMORYBARRIER:
case GT_JMP:
case GT_JCC:
printf(" %#.17g", tree->AsDblCon()->gtDconVal);
}
break;
+
case GT_CNS_STR:
printf("<string constant>");
break;
+
+ case GT_CNS_VEC:
+ {
+ GenTreeVecCon* vecCon = tree->AsVecCon();
+
+ switch (vecCon->TypeGet())
+ {
+#if defined(FEATURE_SIMD)
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ case TYP_SIMD8:
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+ simd8_t simdVal = vecCon->gtSimd8Val;
+ printf("<0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1]);
+ break;
+ }
+
+ case TYP_SIMD12:
+ {
+ simd12_t simdVal = vecCon->gtSimd12Val;
+ printf("<0x%08x, 0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1], simdVal.u32[2]);
+ break;
+ }
+
+ case TYP_SIMD16:
+ {
+ simd16_t simdVal = vecCon->gtSimd16Val;
+ printf("<0x%08x, 0x%08x, 0x%08x, 0x%08x>", simdVal.u32[0], simdVal.u32[1], simdVal.u32[2],
+ simdVal.u32[3]);
+ break;
+ }
+
+ case TYP_SIMD32:
+ {
+ simd32_t simdVal = vecCon->gtSimd32Val;
+ printf("<0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx>", simdVal.u64[0], simdVal.u64[1],
+ simdVal.u64[2], simdVal.u64[3]);
+ break;
+ }
+#endif // FEATURE_SIMD
+
+ default:
+ {
+ unreached();
+ }
+ }
+ break;
+ }
+
default:
assert(!"unexpected constant node");
}
if (dstTyp == TYP_UNDEF)
{
varDsc->lvType = dstTyp = genActualType(valTyp);
+ }
+
#if FEATURE_SIMD
- if (varTypeIsSIMD(dstTyp))
- {
- varDsc->lvSIMDType = 1;
- }
-#endif
+ if (varTypeIsSIMD(dstTyp))
+ {
+ varDsc->lvSIMDType = 1;
}
+#endif
#ifdef DEBUG
// Make sure the actual types match.
}
#endif // TARGET_X86
+#if defined(FEATURE_HW_INTRINSICS)
+//----------------------------------------------------------------------------------------------
+// IsHWIntrinsicCreateConstant: Determines if a HWIntrinsic node represents a vector constant
+//
+// Arguments:
+// node - The node to check
+// simd32Val - The vector constant being constructed
+//
+// Returns:
+// true if node represents a constant; otherwise, false
+bool GenTreeVecCon::IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val)
+{
+ var_types simdType = node->TypeGet();
+ var_types simdBaseType = node->GetSimdBaseType();
+ unsigned simdSize = node->GetSimdSize();
+
+ size_t argCnt = node->GetOperandCount();
+ size_t cnsArgCnt = 0;
+
+ switch (node->GetHWIntrinsicId())
+ {
+ case NI_Vector128_Create:
+#if defined(TARGET_XARCH)
+ case NI_Vector128_CreateScalarUnsafe:
+ case NI_Vector256_Create:
+ case NI_Vector256_CreateScalarUnsafe:
+#elif defined(TARGET_ARM64)
+ case NI_Vector64_Create:
+#endif
+ {
+ // These intrinsics are meant to set the same value to every element.
+ if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, simd32Val, simdBaseType))
+ {
+ // Now assign the rest of the arguments.
+ for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
+ {
+ HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
+ }
+
+ cnsArgCnt = 1;
+ }
+ else
+ {
+ for (unsigned i = 1; i <= argCnt; i++)
+ {
+ if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, simd32Val, simdBaseType))
+ {
+ cnsArgCnt++;
+ }
+ }
+ }
+
+ assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType))));
+ return argCnt == cnsArgCnt;
+ }
+
+ default:
+ {
+ return false;
+ }
+ }
+}
+
+//----------------------------------------------------------------------------------------------
+// HandleArgForHWIntrinsicCreate: Processes an argument for the GenTreeVecCon::IsHWIntrinsicCreateConstant method
+//
+// Arguments:
+// arg - The argument to process
+// argIdx - The index of the argument being processed
+// simd32Val - The vector constant being constructed
+// baseType - The base type of the vector constant
+//
+// Returns:
+// true if arg was a constant; otherwise, false
+bool GenTreeVecCon::HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, simd32_t& simd32Val, var_types baseType)
+{
+ switch (baseType)
+ {
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ {
+ if (arg->IsCnsIntOrI())
+ {
+ simd32Val.i8[argIdx] = static_cast<int8_t>(arg->AsIntCon()->gtIconVal);
+ return true;
+ }
+ else
+ {
+ // We expect the constant to have been already zeroed
+ assert(simd32Val.i8[argIdx] == 0);
+ }
+ break;
+ }
+
+ case TYP_SHORT:
+ case TYP_USHORT:
+ {
+ if (arg->IsCnsIntOrI())
+ {
+ simd32Val.i16[argIdx] = static_cast<int16_t>(arg->AsIntCon()->gtIconVal);
+ return true;
+ }
+ else
+ {
+ // We expect the constant to have been already zeroed
+ assert(simd32Val.i16[argIdx] == 0);
+ }
+ break;
+ }
+
+ case TYP_INT:
+ case TYP_UINT:
+ {
+ if (arg->IsCnsIntOrI())
+ {
+ simd32Val.i32[argIdx] = static_cast<int32_t>(arg->AsIntCon()->gtIconVal);
+ return true;
+ }
+ else
+ {
+ // We expect the constant to have been already zeroed
+ assert(simd32Val.i32[argIdx] == 0);
+ }
+ break;
+ }
+
+ case TYP_LONG:
+ case TYP_ULONG:
+ {
+#if defined(TARGET_64BIT)
+ if (arg->IsCnsIntOrI())
+ {
+ simd32Val.i64[argIdx] = static_cast<int64_t>(arg->AsIntCon()->gtIconVal);
+ return true;
+ }
+#else
+ if (arg->OperIsLong() && arg->AsOp()->gtOp1->IsCnsIntOrI() && arg->AsOp()->gtOp2->IsCnsIntOrI())
+ {
+ // 32-bit targets will decompose GT_CNS_LNG into two GT_CNS_INT
+ // We need to reconstruct the 64-bit value in order to handle this
+
+ INT64 gtLconVal = arg->AsOp()->gtOp2->AsIntCon()->gtIconVal;
+ gtLconVal <<= 32;
+ gtLconVal |= arg->AsOp()->gtOp1->AsIntCon()->gtIconVal;
+
+ simd32Val.i64[argIdx] = gtLconVal;
+ return true;
+ }
+#endif // TARGET_64BIT
+ else
+ {
+ // We expect the constant to have been already zeroed
+ assert(simd32Val.i64[argIdx] == 0);
+ }
+ break;
+ }
+
+ case TYP_FLOAT:
+ {
+ if (arg->IsCnsFltOrDbl())
+ {
+ simd32Val.f32[argIdx] = static_cast<float>(arg->AsDblCon()->gtDconVal);
+ return true;
+ }
+ else
+ {
+ // We expect the constant to have been already zeroed
+ // We check against the i32, rather than f32, to account for -0.0
+ assert(simd32Val.i32[argIdx] == 0);
+ }
+ break;
+ }
+
+ case TYP_DOUBLE:
+ {
+ if (arg->IsCnsFltOrDbl())
+ {
+ simd32Val.f64[argIdx] = static_cast<double>(arg->AsDblCon()->gtDconVal);
+ return true;
+ }
+ else
+ {
+ // We expect the constant to have been already zeroed
+ // We check against the i64, rather than f64, to account for -0.0
+ assert(simd32Val.i64[argIdx] == 0);
+ }
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ return false;
+}
+#endif // FEATURE_HW_INTRINSICS
+
+//----------------------------------------------------------------------------------------------
+// GetSimdBaseType: Gets the var_type for the SimdBaseJitType of a GenTreeVecCon node
+//
+// Returns:
+// the var_type for the SimdBaseJitType of a GenTreeVecCon
+var_types GenTreeVecCon::GetSimdBaseType() const
+{
+ CorInfoType simdBaseJitType = GetSimdBaseJitType();
+
+ if (simdBaseJitType == CORINFO_TYPE_UNDEF)
+ {
+ return TYP_UNKNOWN;
+ }
+ return JitType2PreciseVarType(simdBaseJitType);
+}
+
//------------------------------------------------------------------------
// IsFieldAddr: Is "this" a static or class field address?
//
return fieldTyp != TYP_REF;
}
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// gtGetSIMDZero: Get a zero value of the appropriate SIMD type.
-//
-// Arguments:
-// var_types - The simdType
-// simdBaseJitType - The SIMD base JIT type we need
-// simdHandle - The handle for the SIMD type
-//
-// Return Value:
-// A node generating the appropriate Zero, if we are able to discern it,
-// otherwise null (note that this shouldn't happen, but callers should
-// be tolerant of this case).
-
-GenTree* Compiler::gtGetSIMDZero(var_types simdType, CorInfoType simdBaseJitType, CORINFO_CLASS_HANDLE simdHandle)
-{
- bool found = false;
- bool isHWSIMD = true;
- noway_assert(m_simdHandleCache != nullptr);
-
- // First, determine whether this is Vector<T>.
- if (simdType == getSIMDVectorType())
- {
- switch (simdBaseJitType)
- {
- case CORINFO_TYPE_FLOAT:
- found = (simdHandle == m_simdHandleCache->SIMDFloatHandle);
- break;
- case CORINFO_TYPE_DOUBLE:
- found = (simdHandle == m_simdHandleCache->SIMDDoubleHandle);
- break;
- case CORINFO_TYPE_INT:
- found = (simdHandle == m_simdHandleCache->SIMDIntHandle);
- break;
- case CORINFO_TYPE_USHORT:
- found = (simdHandle == m_simdHandleCache->SIMDUShortHandle);
- break;
- case CORINFO_TYPE_UBYTE:
- found = (simdHandle == m_simdHandleCache->SIMDUByteHandle);
- break;
- case CORINFO_TYPE_SHORT:
- found = (simdHandle == m_simdHandleCache->SIMDShortHandle);
- break;
- case CORINFO_TYPE_BYTE:
- found = (simdHandle == m_simdHandleCache->SIMDByteHandle);
- break;
- case CORINFO_TYPE_LONG:
- found = (simdHandle == m_simdHandleCache->SIMDLongHandle);
- break;
- case CORINFO_TYPE_UINT:
- found = (simdHandle == m_simdHandleCache->SIMDUIntHandle);
- break;
- case CORINFO_TYPE_ULONG:
- found = (simdHandle == m_simdHandleCache->SIMDULongHandle);
- break;
- case CORINFO_TYPE_NATIVEINT:
- found = (simdHandle == m_simdHandleCache->SIMDNIntHandle);
- break;
- case CORINFO_TYPE_NATIVEUINT:
- found = (simdHandle == m_simdHandleCache->SIMDNUIntHandle);
- break;
- default:
- break;
- }
- if (found)
- {
- isHWSIMD = false;
- }
- }
-
- if (!found)
- {
- // We must still have isHWSIMD set to true, and the only non-HW types left are the fixed types.
- switch (simdType)
- {
- case TYP_SIMD8:
- switch (simdBaseJitType)
- {
- case CORINFO_TYPE_FLOAT:
- if (simdHandle == m_simdHandleCache->SIMDVector2Handle)
- {
- isHWSIMD = false;
- }
-#if defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS)
- else
- {
- assert(simdHandle == m_simdHandleCache->Vector64FloatHandle);
- }
- break;
- case CORINFO_TYPE_INT:
- assert(simdHandle == m_simdHandleCache->Vector64IntHandle);
- break;
- case CORINFO_TYPE_USHORT:
- assert(simdHandle == m_simdHandleCache->Vector64UShortHandle);
- break;
- case CORINFO_TYPE_UBYTE:
- assert(simdHandle == m_simdHandleCache->Vector64UByteHandle);
- break;
- case CORINFO_TYPE_SHORT:
- assert(simdHandle == m_simdHandleCache->Vector64ShortHandle);
- break;
- case CORINFO_TYPE_BYTE:
- assert(simdHandle == m_simdHandleCache->Vector64ByteHandle);
- break;
- case CORINFO_TYPE_UINT:
- assert(simdHandle == m_simdHandleCache->Vector64UIntHandle);
-#endif // defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS)
- break;
- default:
- break;
- }
- break;
-
- case TYP_SIMD12:
- assert((simdBaseJitType == CORINFO_TYPE_FLOAT) && (simdHandle == m_simdHandleCache->SIMDVector3Handle));
- isHWSIMD = false;
- break;
-
- case TYP_SIMD16:
- switch (simdBaseJitType)
- {
- case CORINFO_TYPE_FLOAT:
- if (simdHandle == m_simdHandleCache->SIMDVector4Handle)
- {
- isHWSIMD = false;
- }
-#if defined(FEATURE_HW_INTRINSICS)
- else
- {
- assert(simdHandle == m_simdHandleCache->Vector128FloatHandle);
- }
- break;
- case CORINFO_TYPE_DOUBLE:
- assert(simdHandle == m_simdHandleCache->Vector128DoubleHandle);
- break;
- case CORINFO_TYPE_INT:
- assert(simdHandle == m_simdHandleCache->Vector128IntHandle);
- break;
- case CORINFO_TYPE_USHORT:
- assert(simdHandle == m_simdHandleCache->Vector128UShortHandle);
- break;
- case CORINFO_TYPE_UBYTE:
- assert(simdHandle == m_simdHandleCache->Vector128UByteHandle);
- break;
- case CORINFO_TYPE_SHORT:
- assert(simdHandle == m_simdHandleCache->Vector128ShortHandle);
- break;
- case CORINFO_TYPE_BYTE:
- assert(simdHandle == m_simdHandleCache->Vector128ByteHandle);
- break;
- case CORINFO_TYPE_LONG:
- assert(simdHandle == m_simdHandleCache->Vector128LongHandle);
- break;
- case CORINFO_TYPE_UINT:
- assert(simdHandle == m_simdHandleCache->Vector128UIntHandle);
- break;
- case CORINFO_TYPE_ULONG:
- assert(simdHandle == m_simdHandleCache->Vector128ULongHandle);
- break;
- case CORINFO_TYPE_NATIVEINT:
- assert(simdHandle == m_simdHandleCache->Vector128NIntHandle);
- break;
- case CORINFO_TYPE_NATIVEUINT:
- assert(simdHandle == m_simdHandleCache->Vector128NUIntHandle);
- break;
-#endif // defined(FEATURE_HW_INTRINSICS)
-
- default:
- break;
- }
- break;
-
-#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
- case TYP_SIMD32:
- switch (simdBaseJitType)
- {
- case CORINFO_TYPE_FLOAT:
- assert(simdHandle == m_simdHandleCache->Vector256FloatHandle);
- break;
- case CORINFO_TYPE_DOUBLE:
- assert(simdHandle == m_simdHandleCache->Vector256DoubleHandle);
- break;
- case CORINFO_TYPE_INT:
- assert(simdHandle == m_simdHandleCache->Vector256IntHandle);
- break;
- case CORINFO_TYPE_USHORT:
- assert(simdHandle == m_simdHandleCache->Vector256UShortHandle);
- break;
- case CORINFO_TYPE_UBYTE:
- assert(simdHandle == m_simdHandleCache->Vector256UByteHandle);
- break;
- case CORINFO_TYPE_SHORT:
- assert(simdHandle == m_simdHandleCache->Vector256ShortHandle);
- break;
- case CORINFO_TYPE_BYTE:
- assert(simdHandle == m_simdHandleCache->Vector256ByteHandle);
- break;
- case CORINFO_TYPE_LONG:
- assert(simdHandle == m_simdHandleCache->Vector256LongHandle);
- break;
- case CORINFO_TYPE_UINT:
- assert(simdHandle == m_simdHandleCache->Vector256UIntHandle);
- break;
- case CORINFO_TYPE_ULONG:
- assert(simdHandle == m_simdHandleCache->Vector256ULongHandle);
- break;
- case CORINFO_TYPE_NATIVEINT:
- assert(simdHandle == m_simdHandleCache->Vector256NIntHandle);
- break;
- case CORINFO_TYPE_NATIVEUINT:
- assert(simdHandle == m_simdHandleCache->Vector256NUIntHandle);
- break;
- default:
- break;
- }
- break;
-#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
- default:
- break;
- }
- }
-
- unsigned size = genTypeSize(simdType);
- if (isHWSIMD)
- {
-#if defined(FEATURE_HW_INTRINSICS)
- return gtNewSimdZeroNode(simdType, simdBaseJitType, size, /* isSimdAsHWIntrinsic */ false);
-#else
- JITDUMP("Coudn't find the matching SIMD type for %s<%s> in gtGetSIMDZero\n", varTypeName(simdType),
- varTypeName(JitType2PreciseVarType(simdBaseJitType)));
-
- return nullptr;
-#endif // FEATURE_HW_INTRINSICS
- }
- else
- {
- return gtNewSIMDVectorZero(simdType, simdBaseJitType, size);
- }
-}
-#endif // FEATURE_SIMD
-
CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
{
CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE;
}
break;
#endif
+ case GT_CNS_VEC:
+ {
+#if defined(FEATURE_HW_INTRINSICS)
+ structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsVecCon()->GetSimdBaseJitType());
+#endif // FEATURE_HW_INTRINSICS
+
+#if defined(FEATURE_SIMD)
+ if (structHnd == NO_CLASS_HANDLE)
+ {
+ structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsVecCon()->GetSimdBaseJitType());
+ }
+#endif // FEATURE_SIMD
break;
+ }
}
// TODO-1stClassStructs: add a check that `structHnd != NO_CLASS_HANDLE`,
// nowadays it won't work because the right part of an ASG could have struct type without a handle
return false;
}
}
-#elif TARGET_ARM64
- switch (AsHWIntrinsic()->GetHWIntrinsicId())
- {
- case NI_Vector64_get_Zero:
- case NI_Vector128_get_Zero:
- {
- return true;
- }
-
- default:
- {
- return false;
- }
- }
#else
return false;
#endif // TARGET_XARCH
else
{
GenTree* tmp;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
GenTree* op1Dup1;
op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL,
nullptr DEBUGARG("Clone op1 for vector abs"));
// op1 = op1 < Zero
- tmp = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ tmp = gtNewZeroConNode(type, simdBaseJitType);
op1 = gtNewSimdCmpOpNode(GT_LT, type, op1, tmp, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
// tmp = Zero - op1Dup1
- tmp = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ tmp = gtNewZeroConNode(type, simdBaseJitType);
tmp = gtNewSimdBinOpNode(GT_SUB, type, tmp, op1Dup1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
// result = ConditionalSelect(op1, tmp, op1Dup2)
}
NamedIntrinsic intrinsic = NI_Illegal;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
switch (op)
{
assert(varTypeIsArithmetic(simdBaseType));
NamedIntrinsic intrinsic = NI_Illegal;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
switch (op)
{
// We want to generate a comparison along the lines of
// GT_XX(op1, op2).As<T, TInteger>() == Vector128<TInteger>.AllBitsSet
- NamedIntrinsic getAllBitsSet = NI_Illegal;
-
if (simdSize == 32)
{
// TODO-XArch-CQ: It's a non-trivial amount of work to support these
// other things, inverting the comparison and potentially support for a
// new Avx.TestNotZ intrinsic to ensure the codegen remains efficient.
assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
-
- intrinsic = NI_Vector256_op_Equality;
- getAllBitsSet = NI_Vector256_get_AllBitsSet;
+ intrinsic = NI_Vector256_op_Equality;
}
else
{
- intrinsic = NI_Vector128_op_Equality;
- getAllBitsSet = NI_Vector128_get_AllBitsSet;
+ intrinsic = NI_Vector128_op_Equality;
}
op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize,
simdBaseJitType = CORINFO_TYPE_LONG;
}
- op2 = gtNewSimdHWIntrinsicNode(simdType, getAllBitsSet, simdBaseJitType, simdSize);
+ op2 = gtNewAllBitsSetConNode(simdType, simdBaseJitType);
break;
}
#elif defined(TARGET_ARM64)
// We want to generate a comparison along the lines of
// GT_XX(op1, op2).As<T, TInteger>() == Vector128<TInteger>.AllBitsSet
- NamedIntrinsic getAllBitsSet = NI_Illegal;
-
if (simdSize == 8)
{
- intrinsic = NI_Vector64_op_Equality;
- getAllBitsSet = NI_Vector64_get_AllBitsSet;
+ intrinsic = NI_Vector64_op_Equality;
}
else
{
- intrinsic = NI_Vector128_op_Equality;
- getAllBitsSet = NI_Vector128_get_AllBitsSet;
+ intrinsic = NI_Vector128_op_Equality;
}
op1 = gtNewSimdCmpOpNode(op, simdType, op1, op2, simdBaseJitType, simdSize,
simdBaseJitType = CORINFO_TYPE_LONG;
}
- op2 = gtNewSimdHWIntrinsicNode(simdType, getAllBitsSet, simdBaseJitType, simdSize);
+ op2 = gtNewAllBitsSetConNode(simdType, simdBaseJitType);
break;
}
#else
simdBaseJitType = CORINFO_TYPE_LONG;
}
- op2 = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+ op2 = gtNewZeroConNode(simdType, simdBaseJitType);
break;
}
simdBaseJitType = CORINFO_TYPE_LONG;
}
- op2 = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+ op2 = gtNewZeroConNode(simdType, simdBaseJitType);
break;
}
assert(varTypeIsArithmetic(simdBaseType));
NamedIntrinsic intrinsic = NI_Illegal;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
#if defined(TARGET_XARCH)
if (simdSize == 32)
assert(varTypeIsArithmetic(simdBaseType));
NamedIntrinsic intrinsic = NI_Illegal;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
#if defined(TARGET_XARCH)
if (simdSize == 32)
// code formatting, its too long to reasonably display here.
CorInfoType opBaseJitType = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_SHORT : CORINFO_TYPE_USHORT;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x00FF), NI_Vector256_Create, opBaseJitType,
simdSize, isSimdAsHWIntrinsic);
// return Avx2.Permute4x64(tmp4.AsUInt64(), SHUFFLE_WYZX).As<T>();
CorInfoType opBaseJitType = (simdBaseType == TYP_SHORT) ? CORINFO_TYPE_INT : CORINFO_TYPE_UINT;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x0000FFFF), NI_Vector256_Create, opBaseJitType,
simdSize, isSimdAsHWIntrinsic);
// return Avx2.Permute4x64(tmp3.AsUInt64(), SHUFFLE_WYZX).AsUInt32();
CorInfoType opBaseJitType = (simdBaseType == TYP_INT) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
GenTree* op1Dup;
op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
// return Sse2.PackUnsignedSaturate(tmp1, tmp2).As<T>();
CorInfoType opBaseJitType = (simdBaseType == TYP_BYTE) ? CORINFO_TYPE_SHORT : CORINFO_TYPE_USHORT;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
tmp1 = gtNewSimdHWIntrinsicNode(type, gtNewIconNode(0x00FF), NI_Vector128_Create, opBaseJitType,
simdSize, isSimdAsHWIntrinsic);
// ...
CorInfoType opBaseJitType = (simdBaseType == TYP_SHORT) ? CORINFO_TYPE_INT : CORINFO_TYPE_UINT;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
- clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
GenTree* tmp1Dup;
tmp1 = impCloneExpr(tmp1, &tmp1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
// return Sse2.UnpackLow(tmp1, tmp2).As<T>();
CorInfoType opBaseJitType = (simdBaseType == TYP_INT) ? CORINFO_TYPE_LONG : CORINFO_TYPE_ULONG;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, opBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, opBaseJitType, isSimdAsHWIntrinsic);
GenTree* op1Dup;
op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
{
// AllBitsSet represents indices that are always "out of range" which means zero should be
// selected for every element. We can special-case this down to just returning a zero node
- return gtNewSimdZeroNode(type, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+ return gtNewZeroConNode(type, simdBaseJitType);
}
if (op2->IsVectorZero())
size_t elementCount = simdSize / elementSize;
#if defined(TARGET_XARCH)
- uint8_t control = 0;
- bool crossLane = false;
- bool needsZero = varTypeIsSmallInt(simdBaseType);
- uint64_t value = 0;
- uint8_t vecCns[32] = {};
- uint8_t mskCns[32] = {};
+ uint8_t control = 0;
+ bool crossLane = false;
+ bool needsZero = varTypeIsSmallInt(simdBaseType);
+ uint64_t value = 0;
+ simd32_t vecCns = {};
+ simd32_t mskCns = {};
for (size_t index = 0; index < elementCount; index++)
{
- value = op2->GetIntegralVectorConstElement(index);
+ value = op2->GetIntegralVectorConstElement(index, simdBaseType);
if (value < elementCount)
{
for (uint32_t i = 0; i < elementSize; i++)
{
- vecCns[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
+ vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
// When Ssse3 is not supported, we need to adjust the constant to be AllBitsSet
// so that we can emit a ConditionalSelect(op2, retNode, zeroNode).
- mskCns[(index * elementSize) + i] = 0xFF;
+ mskCns.u8[(index * elementSize) + i] = 0xFF;
}
}
else
for (uint32_t i = 0; i < elementSize; i++)
{
- vecCns[(index * elementSize) + i] = 0xFF;
+ vecCns.u8[(index * elementSize) + i] = 0xFF;
// When Ssse3 is not supported, we need to adjust the constant to be Zero
// so that we can emit a ConditionalSelect(op2, retNode, zeroNode).
- mskCns[(index * elementSize) + i] = 0x00;
+ mskCns.u8[(index * elementSize) + i] = 0x00;
}
}
}
GenTree* op1Lower = gtNewSimdHWIntrinsicNode(type, op1, NI_Vector256_GetLower, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
- IntrinsicNodeBuilder nodeBuilder1(getAllocator(CMK_ASTNode), 16);
-
- for (uint32_t i = 0; i < 16; i++)
- {
- nodeBuilder1.AddOperand(i, gtNewIconNode(vecCns[i]));
- }
+ op2 = gtNewVconNode(TYP_SIMD16, simdBaseJitType);
+ op2->AsVecCon()->gtSimd16Val = vecCns.v128[0];
- op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder1), NI_Vector128_Create, simdBaseJitType, 16,
- isSimdAsHWIntrinsic);
-
- op1Lower = gtNewSimdHWIntrinsicNode(type, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
+ op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
isSimdAsHWIntrinsic);
GenTree* op1Upper = gtNewSimdHWIntrinsicNode(type, op1Dup, gtNewIconNode(1), NI_AVX_ExtractVector128,
simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
- IntrinsicNodeBuilder nodeBuilder2(getAllocator(CMK_ASTNode), 16);
-
- for (uint32_t i = 0; i < 16; i++)
- {
- nodeBuilder2.AddOperand(i, gtNewIconNode(vecCns[16 + i]));
- }
-
- op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder2), NI_Vector128_Create, simdBaseJitType, 16,
- isSimdAsHWIntrinsic);
+ op2 = gtNewVconNode(TYP_SIMD16, simdBaseJitType);
+ op2->AsVecCon()->gtSimd16Val = vecCns.v128[1];
- op1Upper = gtNewSimdHWIntrinsicNode(type, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
+ op1Upper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16,
isSimdAsHWIntrinsic);
return gtNewSimdHWIntrinsicNode(type, op1Lower, op1Upper, gtNewIconNode(1), NI_AVX_InsertVector128,
if (elementSize == 4)
{
- IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), elementCount);
-
for (uint32_t i = 0; i < elementCount; i++)
{
- uint8_t value = (uint8_t)(vecCns[i * elementSize] / elementSize);
- nodeBuilder.AddOperand(i, gtNewIconNode(value));
+ vecCns.u32[i] = (uint8_t)(vecCns.u8[i * elementSize] / elementSize);
}
- CorInfoType indicesJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
-
- op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector256_Create, indicesJitType, simdSize,
- isSimdAsHWIntrinsic);
+ op2 = gtNewVconNode(type, simdBaseJitType);
+ op2->AsVecCon()->gtSimd32Val = vecCns;
// swap the operands to match the encoding requirements
retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize,
{
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
- IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize);
-
- for (uint32_t i = 0; i < simdSize; i++)
- {
- nodeBuilder.AddOperand(i, gtNewIconNode(vecCns[i]));
- }
-
- op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector128_Create, simdBaseJitType, simdSize,
- isSimdAsHWIntrinsic);
+ op2 = gtNewVconNode(type, simdBaseJitType);
+ op2->AsVecCon()->gtSimd16Val = vecCns.v128[0];
return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
{
assert(!compIsaSupportedDebugOnly(InstructionSet_SSSE3));
- IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize);
-
- for (uint32_t i = 0; i < simdSize; i++)
- {
- nodeBuilder.AddOperand(i, gtNewIconNode(mskCns[i]));
- }
+ op2 = gtNewVconNode(type, simdBaseJitType);
+ op2->AsVecCon()->gtSimd16Val = mskCns.v128[0];
- op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), NI_Vector128_Create, simdBaseJitType, simdSize,
- isSimdAsHWIntrinsic);
-
- GenTree* zero = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ GenTree* zero = gtNewZeroConNode(type, simdBaseJitType);
retNode = gtNewSimdCndSelNode(type, op2, retNode, zero, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}
return retNode;
#elif defined(TARGET_ARM64)
- uint64_t value = 0;
- uint8_t vecCns[16] = {};
+ uint64_t value = 0;
+ simd16_t vecCns = {};
for (size_t index = 0; index < elementCount; index++)
{
- value = op2->GetIntegralVectorConstElement(index);
+ value = op2->GetIntegralVectorConstElement(index, simdBaseType);
if (value < elementCount)
{
for (uint32_t i = 0; i < elementSize; i++)
{
- vecCns[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
+ vecCns.u8[(index * elementSize) + i] = (uint8_t)((value * elementSize) + i);
}
}
else
{
for (uint32_t i = 0; i < elementSize; i++)
{
- vecCns[(index * elementSize) + i] = 0xFF;
+ vecCns.u8[(index * elementSize) + i] = 0xFF;
}
}
}
- NamedIntrinsic createIntrinsic = NI_Vector64_Create;
NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup;
if (simdSize == 16)
{
- createIntrinsic = NI_Vector128_Create;
lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup;
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128, simdBaseJitType, simdSize,
isSimdAsHWIntrinsic);
}
- IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), simdSize);
-
- for (uint32_t i = 0; i < simdSize; i++)
- {
- nodeBuilder.AddOperand(i, gtNewIconNode(vecCns[i]));
- }
-
// VectorTableLookup is only valid on byte/sbyte
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
- op2 = gtNewSimdHWIntrinsicNode(type, std::move(nodeBuilder), createIntrinsic, simdBaseJitType, simdSize,
- isSimdAsHWIntrinsic);
+ op2 = gtNewVconNode(type, simdBaseJitType);
+ op2->AsVecCon()->gtSimd16Val = vecCns;
return gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
#else
NamedIntrinsic intrinsic = NI_Illegal;
GenTree* tmp = nullptr;
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(simdType, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(simdType, simdBaseJitType, isSimdAsHWIntrinsic);
#if defined(TARGET_XARCH)
assert(!varTypeIsByte(simdBaseType) && !varTypeIsLong(simdBaseType));
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
}
- op2 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ op2 = gtNewZeroConNode(type, simdBaseJitType);
// Zero - op1
return gtNewSimdBinOpNode(GT_SUB, type, op2, op1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
case GT_NOT:
{
assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX));
-
- intrinsic = (simdSize == 32) ? NI_Vector256_get_AllBitsSet : NI_Vector128_get_AllBitsSet;
- op2 = gtNewSimdHWIntrinsicNode(type, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
-
- // op1 ^ AllBitsSet
+ op2 = gtNewAllBitsSetConNode(type, simdBaseJitType);
return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}
#elif defined(TARGET_ARM64)
else
{
// Zero - op1
- op2 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ op2 = gtNewZeroConNode(type, simdBaseJitType);
return gtNewSimdBinOpNode(GT_SUB, type, op2, op1, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}
}
}
else
{
- tmp1 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ tmp1 = gtNewZeroConNode(type, simdBaseJitType);
if (varTypeIsSigned(simdBaseType))
{
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
GenTree* op1Dup;
op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
else if (varTypeIsFloating(simdBaseType))
{
assert(simdBaseType == TYP_FLOAT);
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
GenTree* op1Dup;
op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
}
else
{
- tmp1 = gtNewSimdZeroNode(type, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ tmp1 = gtNewZeroConNode(type, simdBaseJitType);
if (varTypeIsSigned(simdBaseType))
{
- CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(type, simdBaseJitType);
+ CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSimdOrHW(type, simdBaseJitType, isSimdAsHWIntrinsic);
GenTree* op1Dup;
op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL,
assert(intrinsic != NI_Illegal);
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
- zero = gtNewSimdZeroNode(TYP_SIMD16, simdBaseJitType, 16, isSimdAsHWIntrinsic);
+ zero = gtNewZeroConNode(TYP_SIMD16, simdBaseJitType);
tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
simdBaseJitType, 16, isSimdAsHWIntrinsic);
return gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_GetLower, simdBaseJitType, simdSize,
return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}
-GenTree* Compiler::gtNewSimdZeroNode(var_types type,
- CorInfoType simdBaseJitType,
- unsigned simdSize,
- bool isSimdAsHWIntrinsic)
-{
- assert(IsBaselineSimdIsaSupportedDebugOnly());
-
- assert(varTypeIsSIMD(type));
- assert(getSIMDTypeForSize(simdSize) == type);
-
- var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
- assert(varTypeIsArithmetic(simdBaseType));
-
- NamedIntrinsic intrinsic = NI_Illegal;
-
-#if defined(TARGET_XARCH)
- intrinsic = (simdSize == 32) ? NI_Vector256_get_Zero : NI_Vector128_get_Zero;
-#elif defined(TARGET_ARM64)
- intrinsic = (simdSize > 8) ? NI_Vector128_get_Zero : NI_Vector64_get_Zero;
-#else
-#error Unsupported platform
-#endif // !TARGET_XARCH && !TARGET_ARM64
-
- return gtNewSimdHWIntrinsicNode(type, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
-}
-
GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID)
{
return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID,
GTF_IND_FLAGS = GTF_IND_VOLATILE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF | GTF_IND_UNALIGNED | GTF_IND_INVARIANT |
GTF_IND_NONNULL | GTF_IND_TGT_NOT_HEAP | GTF_IND_TGT_HEAP
#if defined(TARGET_XARCH)
- | GTF_IND_DONT_EXTEND
+ | GTF_IND_DONT_EXTEND
#endif // TARGET_XARCH
,
bool isContainedFltOrDblImmed() const
{
- return isContained() && (OperGet() == GT_CNS_DBL);
+ return isContained() && OperIs(GT_CNS_DBL);
+ }
+
+ bool isContainedVecImmed() const
+ {
+ return isContained() && OperIs(GT_CNS_VEC);
}
bool isLclField() const
bool isUsedFromMemory() const
{
- return ((isContained() && (isMemoryOp() || (OperGet() == GT_LCL_VAR) || (OperGet() == GT_CNS_DBL))) ||
+ return ((isContained() && (isMemoryOp() || OperIs(GT_LCL_VAR, GT_CNS_DBL, GT_CNS_VEC))) ||
isUsedFromSpillTemp());
}
if (gtType == TYP_VOID)
{
// These are the only operators which can produce either VOID or non-VOID results.
- assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsSIMD() ||
- OperIsHWIntrinsic());
+ assert(OperIs(GT_NOP, GT_CALL, GT_COMMA) || OperIsCompare() || OperIsLong() || OperIsSimdOrHWintrinsic() ||
+ IsCnsVec());
return false;
}
static bool OperIsConst(genTreeOps gtOper)
{
- static_assert_no_msg(AreContiguous(GT_CNS_INT, GT_CNS_LNG, GT_CNS_DBL, GT_CNS_STR));
- return (GT_CNS_INT <= gtOper) && (gtOper <= GT_CNS_STR);
+ static_assert_no_msg(AreContiguous(GT_CNS_INT, GT_CNS_LNG, GT_CNS_DBL, GT_CNS_STR, GT_CNS_VEC));
+ return (GT_CNS_INT <= gtOper) && (gtOper <= GT_CNS_VEC);
}
bool OperIsConst() const
bool IsValidCallArgument();
#endif // DEBUG
- inline bool IsFPZero() const;
inline bool IsIntegralConst(ssize_t constVal) const;
- inline bool IsIntegralConstVector(ssize_t constVal) const;
- inline bool IsSIMDZero() const;
inline bool IsFloatPositiveZero() const;
inline bool IsVectorZero() const;
inline bool IsVectorAllBitsSet() const;
inline bool IsVectorConst();
- inline uint64_t GetIntegralVectorConstElement(size_t index);
+ inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType);
inline bool IsBoxedValue();
inline bool IsCnsNonZeroFltOrDbl() const;
+ inline bool IsCnsVec() const;
+
bool IsIconHandle() const
{
return (gtOper == GT_CNS_INT) && ((gtFlags & GTF_ICON_HDL_MASK) != 0);
#endif
};
+// GenTreeVecCon -- vector constant (GT_CNS_VEC)
+//
+struct GenTreeVecCon : public GenTree
+{
+ union {
+ simd8_t gtSimd8Val;
+ simd12_t gtSimd12Val;
+ simd16_t gtSimd16Val;
+ simd32_t gtSimd32Val;
+ };
+
+private:
+ // TODO-1stClassStructs: Tracking the size and base type should be unnecessary since the
+ // size should be `gtType` and the handle should be looked up at callsites where required
+
+ unsigned char gtSimdBaseJitType; // SIMD vector base JIT type
+ unsigned char gtSimdSize; // SIMD vector size in bytes
+
+public:
+ CorInfoType GetSimdBaseJitType() const
+ {
+ return (CorInfoType)gtSimdBaseJitType;
+ }
+
+ void SetSimdBaseJitType(CorInfoType simdBaseJitType)
+ {
+ gtSimdBaseJitType = (unsigned char)simdBaseJitType;
+ assert(gtSimdBaseJitType == simdBaseJitType);
+ }
+
+ var_types GetSimdBaseType() const;
+
+ unsigned char GetSimdSize() const
+ {
+ return gtSimdSize;
+ }
+
+ void SetSimdSize(unsigned simdSize)
+ {
+ gtSimdSize = (unsigned char)simdSize;
+ assert(gtSimdSize == simdSize);
+ }
+
+#if defined(FEATURE_HW_INTRINSICS)
+ static bool IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val);
+
+ static bool HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, simd32_t& simd32Val, var_types baseType);
+#endif // FEATURE_HW_INTRINSICS
+
+ bool IsAllBitsSet() const
+ {
+ switch (gtType)
+ {
+#if defined(FEATURE_SIMD)
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ case TYP_SIMD8:
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+ return (gtSimd8Val.u64[0] == 0xFFFFFFFFFFFFFFFF);
+ }
+
+ case TYP_SIMD12:
+ {
+ return (gtSimd12Val.u32[0] == 0xFFFFFFFF) && (gtSimd12Val.u32[1] == 0xFFFFFFFF) &&
+ (gtSimd12Val.u32[2] == 0xFFFFFFFF);
+ }
+
+ case TYP_SIMD16:
+ {
+ return (gtSimd16Val.u64[0] == 0xFFFFFFFFFFFFFFFF) && (gtSimd16Val.u64[1] == 0xFFFFFFFFFFFFFFFF);
+ }
+
+ case TYP_SIMD32:
+ {
+ return (gtSimd32Val.u64[0] == 0xFFFFFFFFFFFFFFFF) && (gtSimd32Val.u64[1] == 0xFFFFFFFFFFFFFFFF) &&
+ (gtSimd32Val.u64[2] == 0xFFFFFFFFFFFFFFFF) && (gtSimd32Val.u64[3] == 0xFFFFFFFFFFFFFFFF);
+ }
+#endif // FEATURE_SIMD
+
+ default:
+ {
+ unreached();
+ }
+ }
+ }
+
+ static bool Equals(const GenTreeVecCon* left, const GenTreeVecCon* right)
+ {
+ var_types gtType = left->TypeGet();
+
+ if (gtType != right->TypeGet())
+ {
+ return false;
+ }
+
+ switch (gtType)
+ {
+#if defined(FEATURE_SIMD)
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ case TYP_SIMD8:
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+ return (left->gtSimd8Val.u64[0] == right->gtSimd8Val.u64[0]);
+ }
+
+ case TYP_SIMD12:
+ {
+ return (left->gtSimd12Val.u32[0] == right->gtSimd12Val.u32[0]) &&
+ (left->gtSimd12Val.u32[1] == right->gtSimd12Val.u32[1]) &&
+ (left->gtSimd12Val.u32[2] == right->gtSimd12Val.u32[2]);
+ }
+
+ case TYP_SIMD16:
+ {
+ return (left->gtSimd16Val.u64[0] == right->gtSimd16Val.u64[0]) &&
+ (left->gtSimd16Val.u64[1] == right->gtSimd16Val.u64[1]);
+ }
+
+ case TYP_SIMD32:
+ {
+ return (left->gtSimd32Val.u64[0] == right->gtSimd32Val.u64[0]) &&
+ (left->gtSimd32Val.u64[1] == right->gtSimd32Val.u64[1]) &&
+ (left->gtSimd32Val.u64[2] == right->gtSimd32Val.u64[2]) &&
+ (left->gtSimd32Val.u64[3] == right->gtSimd32Val.u64[3]);
+ }
+#endif // FEATURE_SIMD
+
+ default:
+ {
+ unreached();
+ }
+ }
+ }
+
+ bool IsZero() const
+ {
+ switch (gtType)
+ {
+#if defined(FEATURE_SIMD)
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ case TYP_SIMD8:
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+ return (gtSimd8Val.u64[0] == 0x0000000000000000);
+ }
+
+ case TYP_SIMD12:
+ {
+ return (gtSimd12Val.u32[0] == 0x00000000) && (gtSimd12Val.u32[1] == 0x00000000) &&
+ (gtSimd12Val.u32[2] == 0x00000000);
+ }
+
+ case TYP_SIMD16:
+ {
+ return (gtSimd16Val.u64[0] == 0x0000000000000000) && (gtSimd16Val.u64[1] == 0x0000000000000000);
+ }
+
+ case TYP_SIMD32:
+ {
+ return (gtSimd32Val.u64[0] == 0x0000000000000000) && (gtSimd32Val.u64[1] == 0x0000000000000000) &&
+ (gtSimd32Val.u64[2] == 0x0000000000000000) && (gtSimd32Val.u64[3] == 0x0000000000000000);
+ }
+#endif // FEATURE_SIMD
+
+ default:
+ {
+ unreached();
+ }
+ }
+ }
+
+ GenTreeVecCon(var_types type, CorInfoType simdBaseJitType, unsigned simdSize)
+ : GenTree(GT_CNS_VEC, type)
+ , gtSimdBaseJitType((unsigned char)simdBaseJitType)
+ , gtSimdSize((unsigned char)simdSize)
+ {
+ assert(varTypeIsSIMD(type));
+ assert(gtSimdBaseJitType == simdBaseJitType);
+ assert(gtSimdSize == simdSize);
+ }
+
+#if DEBUGGABLE_GENTREE
+ GenTreeVecCon() : GenTree()
+ {
+ }
+#endif
+};
+
// Common supertype of LCL_VAR, LCL_FLD, REG_VAR, PHI_ARG
// This inherits from UnOp because lclvar stores are Unops
struct GenTreeLclVarCommon : public GenTreeUnOp
{
src = AsBlk()->Data()->gtSkipReloadOrCopy();
}
- return src->OperIsInitVal() || src->OperIsConst();
+ return src->OperIsInitVal() || src->IsIntegralConst();
}
inline bool GenTree::OperIsCopyBlkOp()
}
//------------------------------------------------------------------------
-// IsFPZero: Checks whether this is a floating point constant with value 0.0
-//
-// Return Value:
-// Returns true iff the tree is an GT_CNS_DBL, with value of 0.0.
-
-inline bool GenTree::IsFPZero() const
-{
- if ((gtOper == GT_CNS_DBL) && (AsDblCon()->gtDconVal == 0.0))
- {
- return true;
- }
- return false;
-}
-
-//------------------------------------------------------------------------
// IsIntegralConst: Checks whether this is a constant node with the given value
//
// Arguments:
}
//-------------------------------------------------------------------
-// IsIntegralConstVector: returns true if this is an SIMD vector
-// with all its elements equal to an integral constant.
-//
-// Arguments:
-// constVal - const value of vector element
-//
-// Returns:
-// True if this represents an integral const SIMD vector.
-//
-inline bool GenTree::IsIntegralConstVector(ssize_t constVal) const
-{
-#ifdef FEATURE_SIMD
- // SIMDIntrinsicInit intrinsic with a const value as initializer
- // represents a const vector.
- if ((gtOper == GT_SIMD) && (AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInit) &&
- AsSIMD()->Op(1)->IsIntegralConst(constVal))
- {
- assert(varTypeIsIntegral(AsSIMD()->GetSimdBaseType()));
- assert(AsSIMD()->GetOperandCount() == 1);
- return true;
- }
-#endif // FEATURE_SIMD
-
-#ifdef FEATURE_HW_INTRINSICS
- if (gtOper == GT_HWINTRINSIC)
- {
- const GenTreeHWIntrinsic* node = AsHWIntrinsic();
-
- if (!varTypeIsIntegral(node->GetSimdBaseType()))
- {
- // Can't be an integral constant
- return false;
- }
-
- NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
-
- if ((node->GetOperandCount() == 0) && (constVal == 0))
- {
-#if defined(TARGET_XARCH)
- return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
-#elif defined(TARGET_ARM64)
- return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
-#endif // !TARGET_XARCH && !TARGET_ARM64
- }
- else if ((node->GetOperandCount() == 1) && node->Op(1)->IsIntegralConst(constVal))
- {
-#if defined(TARGET_XARCH)
- return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create);
-#elif defined(TARGET_ARM64)
- return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create);
-#endif // !TARGET_XARCH && !TARGET_ARM64
- }
- }
-#endif // FEATURE_HW_INTRINSICS
-
- return false;
-}
-
-//-------------------------------------------------------------------
-// IsSIMDZero: returns true if this is an SIMD vector with all its
-// elements equal to zero.
-//
-// Returns:
-// True if this represents an integral const SIMD vector.
-//
-inline bool GenTree::IsSIMDZero() const
-{
-#ifdef FEATURE_SIMD
- if ((gtOper == GT_SIMD) && (AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInit))
- {
- return (AsSIMD()->Op(1)->IsIntegralConst(0) || AsSIMD()->Op(1)->IsFPZero());
- }
-#endif
-
- return false;
-}
-
-//-------------------------------------------------------------------
// IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0)
//
// Returns:
}
//-------------------------------------------------------------------
-// IsVectorZero: returns true if this node is a HWIntrinsic that is Vector*_get_Zero.
+// IsVectorZero: returns true if this node is a vector constant with all bits zero.
//
// Returns:
-// True if this represents a HWIntrinsic node that is Vector*_get_Zero.
+// True if this node is a vector constant with all bits zero
//
-// TODO: We already have IsSIMDZero() and IsIntegralConstVector(0),
-// however, IsSIMDZero() does not cover hardware intrinsics, and IsIntegralConstVector(0) does not cover floating
-// point. In order to not risk adverse behaviour by modifying those, this function 'IsVectorZero' was introduced.
-// At some point, it makes sense to normalize this logic to be a single function call rather than have several
-// separate ones; preferably this one.
inline bool GenTree::IsVectorZero() const
{
-#ifdef FEATURE_HW_INTRINSICS
- if (gtOper == GT_HWINTRINSIC)
- {
- const GenTreeHWIntrinsic* node = AsHWIntrinsic();
- const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
-
-#if defined(TARGET_XARCH)
- return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
-#elif defined(TARGET_ARM64)
- return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
-#endif // !TARGET_XARCH && !TARGET_ARM64
- }
-#endif // FEATURE_HW_INTRINSICS
-
- return false;
+ return IsCnsVec() && AsVecCon()->IsZero();
}
//-------------------------------------------------------------------
-// IsVectorAllBitsSet: returns true if this node is a HWIntrinsic that is Vector*_get_AllBitsSet.
+// IsVectorAllBitsSet: returns true if this node is a vector constant with all bits set.
//
// Returns:
-// True if this represents a HWIntrinsic node that is Vector*_get_AllBitsSet.
+// True if this node is a vector constant with all bits set
//
inline bool GenTree::IsVectorAllBitsSet() const
{
-#ifdef FEATURE_HW_INTRINSICS
- if (gtOper == GT_HWINTRINSIC)
+#ifdef FEATURE_SIMD
+ if (OperIs(GT_CNS_VEC))
{
- const GenTreeHWIntrinsic* node = AsHWIntrinsic();
- const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
-
-#if defined(TARGET_XARCH)
- return (intrinsicId == NI_Vector128_get_AllBitsSet) || (intrinsicId == NI_Vector256_get_AllBitsSet);
-#elif defined(TARGET_ARM64)
- return (intrinsicId == NI_Vector64_get_AllBitsSet) || (intrinsicId == NI_Vector128_get_AllBitsSet);
-#endif // !TARGET_XARCH && !TARGET_ARM64
+ return AsVecCon()->IsAllBitsSet();
}
-#endif // FEATURE_HW_INTRINSICS
+#endif // FEATURE_SIMD
return false;
}
//
inline bool GenTree::IsVectorConst()
{
-#ifdef FEATURE_HW_INTRINSICS
- if (gtOper == GT_HWINTRINSIC)
+#ifdef FEATURE_SIMD
+ if (OperIs(GT_CNS_VEC))
{
- const GenTreeHWIntrinsic* node = AsHWIntrinsic();
- const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
-
-#if defined(TARGET_XARCH)
- if ((intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create))
- {
- for (GenTree* arg : Operands())
- {
- if (!arg->IsIntegralConst() && !arg->IsCnsFltOrDbl())
- {
- return false;
- }
- }
-
- return true;
- }
-#elif defined(TARGET_ARM64)
- if ((intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create))
- {
- for (GenTree* arg : Operands())
- {
- if (!arg->IsIntegralConst() && !arg->IsCnsFltOrDbl())
- {
- return false;
- }
- }
-
- return true;
- }
-#endif // !TARGET_XARCH && !TARGET_ARM64
-
- return IsVectorZero() || IsVectorAllBitsSet();
+ return true;
}
-#endif // FEATURE_HW_INTRINSICS
+#endif // FEATURE_SIMD
return false;
}
// Returns:
// The value of a given element in an integral vector constant
//
-inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index)
+inline uint64_t GenTree::GetIntegralVectorConstElement(size_t index, var_types simdBaseType)
{
#ifdef FEATURE_HW_INTRINSICS
- if (gtOper == GT_HWINTRINSIC)
+ if (IsCnsVec())
{
- const GenTreeHWIntrinsic* node = AsHWIntrinsic();
- const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
- size_t operandsCount = node->GetOperandCount();
-
- CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
- var_types simdBaseType = node->GetSimdBaseType();
+ const GenTreeVecCon* node = AsVecCon();
-#if defined(TARGET_XARCH)
- if ((intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create))
- {
- return (uint64_t)node->Op(index + 1)->AsIntConCommon()->IntegralValue();
- }
-#elif defined(TARGET_ARM64)
- if ((intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create))
+ switch (simdBaseType)
{
- return (uint64_t)node->Op(index + 1)->AsIntConCommon()->IntegralValue();
- }
-#endif // !TARGET_XARCH && !TARGET_ARM64
+ case TYP_BYTE:
+ {
+ return node->gtSimd32Val.i8[index];
+ }
- if (IsVectorZero())
- {
- return 0;
- }
+ case TYP_UBYTE:
+ {
+ return node->gtSimd32Val.u8[index];
+ }
- if (IsVectorAllBitsSet())
- {
- switch (simdBaseType)
+ case TYP_SHORT:
{
- case TYP_BYTE:
- case TYP_UBYTE:
- {
- return 0xFF;
- }
+ return node->gtSimd32Val.i16[index];
+ }
- case TYP_SHORT:
- case TYP_USHORT:
- {
- return 0xFFFF;
- }
+ case TYP_USHORT:
+ {
+ return node->gtSimd32Val.u16[index];
+ }
- case TYP_INT:
- case TYP_UINT:
- {
- return 0xFFFFFFFF;
- }
+ case TYP_INT:
+ case TYP_FLOAT:
+ {
+ return node->gtSimd32Val.i32[index];
+ }
- case TYP_LONG:
- case TYP_ULONG:
- {
- return 0xFFFFFFFFFFFFFFFF;
- }
+ case TYP_UINT:
+ {
+ return node->gtSimd32Val.u32[index];
+ }
- default:
- {
- unreached();
- }
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ {
+ return node->gtSimd32Val.i64[index];
+ }
+
+ case TYP_ULONG:
+ {
+ return node->gtSimd32Val.u64[index];
+ }
+
+ default:
+ {
+ unreached();
}
}
}
inline bool GenTree::IsCnsFltOrDbl() const
{
- return OperGet() == GT_CNS_DBL;
+ return OperIs(GT_CNS_DBL);
}
inline bool GenTree::IsCnsNonZeroFltOrDbl() const
{
- if (OperGet() == GT_CNS_DBL)
+ if (IsCnsFltOrDbl())
{
double constValue = AsDblCon()->gtDconVal;
return *(__int64*)&constValue != 0;
return false;
}
+inline bool GenTree::IsCnsVec() const
+{
+ return OperIs(GT_CNS_VEC);
+}
+
inline bool GenTree::IsHelperCall()
{
return OperGet() == GT_CALL && AsCall()->gtCallType == CT_HELPER;
GTNODE(CNS_LNG , GenTreeLngCon ,0,GTK_LEAF)
GTNODE(CNS_DBL , GenTreeDblCon ,0,GTK_LEAF)
GTNODE(CNS_STR , GenTreeStrCon ,0,GTK_LEAF)
+GTNODE(CNS_VEC , GenTreeVecCon ,0,GTK_LEAF)
//-----------------------------------------------------------------------------
// Unary operators (1 operand):
GTSTRUCT_1(LngCon , GT_CNS_LNG)
GTSTRUCT_1(DblCon , GT_CNS_DBL)
GTSTRUCT_1(StrCon , GT_CNS_STR)
+GTSTRUCT_1(VecCon , GT_CNS_VEC)
GTSTRUCT_N(LclVarCommon, GT_LCL_VAR, GT_LCL_FLD, GT_PHI_ARG, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)
GTSTRUCT_3(LclVar , GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_STORE_LCL_VAR)
GTSTRUCT_3(LclFld , GT_LCL_FLD, GT_STORE_LCL_FLD, GT_LCL_FLD_ADDR)
case NI_Vector64_Create:
case NI_Vector128_Create:
{
- // We shouldn't handle this as an intrinsic if the
- // respective ISAs have been disabled by the user.
+ uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
+ assert((sig->numArgs == 1) || (sig->numArgs == simdLength));
+
+ bool isConstant = true;
+
+ if (varTypeIsFloating(simdBaseType))
+ {
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ GenTree* arg = impStackTop(index).val;
+
+ if (!arg->IsCnsFltOrDbl())
+ {
+ isConstant = false;
+ break;
+ }
+ }
+ }
+ else
+ {
+ assert(varTypeIsIntegral(simdBaseType));
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ GenTree* arg = impStackTop(index).val;
+
+ if (!arg->IsIntegralConst())
+ {
+ isConstant = false;
+ break;
+ }
+ }
+ }
+
+ if (isConstant)
+ {
+ // Some of the below code assumes 8 or 16 byte SIMD types
+ assert((simdSize == 8) || (simdSize == 16));
+
+ // For create intrinsics that take 1 operand, we broadcast the value.
+ //
+ // This happens even for CreateScalarUnsafe since the upper bits are
+ // considered non-deterministic and we can therefore set them to anything.
+ //
+ // We do this as it simplifies the logic and allows certain code paths to
+ // have better codegen, such as for 0, AllBitsSet, or certain small constants
+
+ GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType);
+
+ switch (simdBaseType)
+ {
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ {
+ uint8_t cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<uint8_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+ vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < simdLength - 1; index++)
+ {
+ vecCon->gtSimd16Val.u8[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_SHORT:
+ case TYP_USHORT:
+ {
+ uint16_t cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<uint16_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+ vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd16Val.u16[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_INT:
+ case TYP_UINT:
+ {
+ uint32_t cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<uint32_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+ vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd16Val.u32[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_LONG:
+ case TYP_ULONG:
+ {
+ uint64_t cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<uint64_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+ vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd16Val.u64[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_FLOAT:
+ {
+ float cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<float>(impPopStack().val->AsDblCon()->gtDconVal);
+ vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd16Val.f32[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_DOUBLE:
+ {
+ double cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<double>(impPopStack().val->AsDblCon()->gtDconVal);
+ vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd16Val.f64[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ retNode = vecCon;
+ break;
+ }
IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs);
/* isSimdAsHWIntrinsic */ false);
op1 = gtNewCastNode(TYP_INT, op1, /* isUnsigned */ true, TYP_INT);
- GenTree* zero = gtNewSimdZeroNode(simdType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+ GenTree* zero = gtNewZeroConNode(simdType, simdBaseJitType);
ssize_t index = 8 / genTypeSize(simdBaseType);
op2 = gtNewSimdHWIntrinsicNode(simdType, op2, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
case NI_Vector64_get_AllBitsSet:
case NI_Vector128_get_AllBitsSet:
{
- assert(!sig->hasThis());
- assert(numArgs == 0);
-
- retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize);
+ assert(sig->numArgs == 0);
+ retNode = gtNewAllBitsSetConNode(retType, simdBaseJitType);
break;
}
case NI_Vector128_get_Zero:
{
assert(sig->numArgs == 0);
- retNode = gtNewSimdZeroNode(retType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+ retNode = gtNewZeroConNode(retType, simdBaseJitType);
break;
}
// AdvSimd.ExtractVector128(vector, Vector128<T>.Zero, 8 / sizeof(T)).GetLower();
assert(numArgs == 1);
op1 = impPopStack().val;
- GenTree* zero = gtNewSimdHWIntrinsicNode(retType, NI_Vector128_get_Zero, simdBaseJitType, simdSize);
+ GenTree* zero = gtNewZeroConNode(retType, simdBaseJitType);
ssize_t index = 8 / genTypeSize(simdBaseType);
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
break;
}
- size_t elementSize = genTypeSize(simdBaseType);
- size_t elementCount = simdSize / elementSize;
-
- if (genTypeSize(indices->AsHWIntrinsic()->GetSimdBaseType()) != elementSize)
- {
- // TODO-ARM64-CQ: Handling reinterpreted vector constants is a bit more complex
- break;
- }
-
if (sig->numArgs == 2)
{
op2 = impSIMDPopStack(retType);
break;
}
- // mvni doesn't support the range of element types, so hard code the 'opts' value.
- case NI_Vector64_get_Zero:
- case NI_Vector64_get_AllBitsSet:
- GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_2S);
- break;
-
- case NI_Vector128_get_Zero:
- case NI_Vector128_get_AllBitsSet:
- GetEmitter()->emitIns_R_I(ins, emitSize, targetReg, 0, INS_OPTS_4S);
- break;
-
case NI_AdvSimd_DuplicateToVector64:
case NI_AdvSimd_DuplicateToVector128:
case NI_AdvSimd_Arm64_DuplicateToVector64:
break;
}
- case NI_Vector128_get_Zero:
- case NI_Vector256_get_Zero:
- {
- emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
- break;
- }
-
- case NI_Vector128_get_AllBitsSet:
- if (varTypeIsFloating(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX))
- {
- // The following corresponds to vcmptrueps pseudo-op and not available without VEX prefix.
- emit->emitIns_SIMD_R_R_R_I(ins, attr, targetReg, targetReg, targetReg, 15);
- }
- else
- {
- emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
- }
- break;
-
- case NI_Vector256_get_AllBitsSet:
- if (varTypeIsIntegral(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
- {
- emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
- }
- else
- {
- assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX));
- // The following corresponds to vcmptrueps pseudo-op.
- emit->emitIns_SIMD_R_R_R_I(INS_cmpps, attr, targetReg, targetReg, targetReg, 15);
- }
- break;
-
default:
{
unreached();
HARDWARE_INTRINSIC(Vector64, EqualsAny, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, ExtractMostSignificantBits, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, Floor, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(Vector64, get_Count, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(Vector64, GetElement, 8, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, GreaterThan, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, GreaterThanAll, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
-HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
+HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, GetLower, 16, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, GetUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector128, get_Count, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256, EqualsAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256, ExtractMostSignificantBits, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector256, get_Count, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_ReturnsPerElementMask)
+HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, GreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
case NI_Vector128_Create:
case NI_Vector256_Create:
+ case NI_Vector128_CreateScalarUnsafe:
+ case NI_Vector256_CreateScalarUnsafe:
{
+ uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
+ assert((sig->numArgs == 1) || (sig->numArgs == simdLength));
+
+ bool isConstant = true;
+
+ if (varTypeIsFloating(simdBaseType))
+ {
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ GenTree* arg = impStackTop(index).val;
+
+ if (!arg->IsCnsFltOrDbl())
+ {
+ isConstant = false;
+ break;
+ }
+ }
+ }
+ else
+ {
+ assert(varTypeIsIntegral(simdBaseType));
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ GenTree* arg = impStackTop(index).val;
+
+ if (!arg->IsIntegralConst())
+ {
+ isConstant = false;
+ break;
+ }
+ }
+ }
+
+ if (isConstant)
+ {
+ // Some of the below code assumes 16 or 32 byte SIMD types
+ assert((simdSize == 16) || (simdSize == 32));
+
+ // For create intrinsics that take 1 operand, we broadcast the value.
+ //
+ // This happens even for CreateScalarUnsafe since the upper bits are
+ // considered non-deterministic and we can therefore set them to anything.
+ //
+ // We do this as it simplifies the logic and allows certain code paths to
+ // have better codegen, such as for 0, AllBitsSet, or certain small constants
+
+ GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType);
+
+ switch (simdBaseType)
+ {
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ {
+ uint8_t cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<uint8_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+ vecCon->gtSimd32Val.u8[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < simdLength - 1; index++)
+ {
+ vecCon->gtSimd32Val.u8[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_SHORT:
+ case TYP_USHORT:
+ {
+ uint16_t cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<uint16_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+ vecCon->gtSimd32Val.u16[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd32Val.u16[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_INT:
+ case TYP_UINT:
+ {
+ uint32_t cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<uint32_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+ vecCon->gtSimd32Val.u32[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd32Val.u32[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_LONG:
+ case TYP_ULONG:
+ {
+ uint64_t cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<uint64_t>(impPopStack().val->AsIntConCommon()->IntegralValue());
+ vecCon->gtSimd32Val.u64[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd32Val.u64[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_FLOAT:
+ {
+ float cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<float>(impPopStack().val->AsDblCon()->gtDconVal);
+ vecCon->gtSimd32Val.f32[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd32Val.f32[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ case TYP_DOUBLE:
+ {
+ double cnsVal = 0;
+
+ for (uint32_t index = 0; index < sig->numArgs; index++)
+ {
+ cnsVal = static_cast<double>(impPopStack().val->AsDblCon()->gtDconVal);
+ vecCon->gtSimd32Val.f64[simdLength - 1 - index] = cnsVal;
+ }
+
+ if (sig->numArgs == 1)
+ {
+ for (uint32_t index = 0; index < (simdLength - 1); index++)
+ {
+ vecCon->gtSimd32Val.f64[index] = cnsVal;
+ }
+ }
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ retNode = vecCon;
+ break;
+ }
+
#if defined(TARGET_X86)
if (varTypeIsLong(simdBaseType))
{
break;
}
- case NI_Vector128_CreateScalarUnsafe:
- case NI_Vector256_CreateScalarUnsafe:
- {
- assert(sig->numArgs == 1);
-
-#ifdef TARGET_X86
- if (varTypeIsLong(simdBaseType))
- {
- // TODO-XARCH-CQ: It may be beneficial to emit the movq
- // instruction, which takes a 64-bit memory address and
- // works on 32-bit x86 systems.
- break;
- }
-#endif // TARGET_X86
-
- op1 = impPopStack().val;
- retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
- break;
- }
-
case NI_Vector128_Divide:
case NI_Vector256_Divide:
case NI_Vector128_op_Division:
case NI_Vector256_get_AllBitsSet:
{
assert(sig->numArgs == 0);
- retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, simdBaseJitType, simdSize);
+ retNode = gtNewAllBitsSetConNode(retType, simdBaseJitType);
break;
}
case NI_Vector256_get_Zero:
{
assert(sig->numArgs == 0);
- retNode = gtNewSimdZeroNode(retType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+ retNode = gtNewZeroConNode(retType, simdBaseJitType);
break;
}
case NI_Vector256_Shuffle:
{
assert((sig->numArgs == 2) || (sig->numArgs == 3));
+ assert((simdSize == 16) || (simdSize == 32));
GenTree* indices = impStackTop(0).val;
size_t elementSize = genTypeSize(simdBaseType);
size_t elementCount = simdSize / elementSize;
- if (genTypeSize(indices->AsHWIntrinsic()->GetSimdBaseType()) != elementSize)
- {
- // TODO-XARCH-CQ: Handling reinterpreted vector constants is a bit more complex
- break;
- }
-
if (simdSize == 32)
{
if (!compExactlyDependsOn(InstructionSet_AVX2))
for (size_t index = 0; index < elementCount; index++)
{
- uint64_t value = indices->GetIntegralVectorConstElement(index);
+ uint64_t value = indices->GetIntegralVectorConstElement(index, simdBaseType);
if (value >= elementCount)
{
case GT_CNS_LNG:
case GT_CNS_DBL:
case GT_CNS_STR:
+ case GT_CNS_VEC:
case GT_LCL_VAR:
table->val = gtCloneExpr(tree);
break;
#endif // FEATURE_HW_INTRINSICS
{
assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_FIELD, GT_IND, GT_OBJ, GT_CALL, GT_MKREFANY, GT_RET_EXPR,
- GT_COMMA) ||
+ GT_COMMA, GT_CNS_VEC) ||
((src->TypeGet() != TYP_STRUCT) && src->OperIsSIMD()));
}
#endif // DEBUG
return (structVal->AsObj()->Addr());
}
else if (oper == GT_CALL || oper == GT_RET_EXPR || oper == GT_OBJ || oper == GT_MKREFANY ||
- structVal->OperIsSimdOrHWintrinsic())
+ structVal->OperIsSimdOrHWintrinsic() || structVal->IsCnsVec())
{
unsigned tmpNum = lvaGrabTemp(true DEBUGARG("struct address for call/obj"));
alreadyNormalized = true;
break;
+ case GT_CNS_VEC:
+ {
+ assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
+ break;
+ }
+
#ifdef FEATURE_SIMD
case GT_SIMD:
assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
}
#ifdef FEATURE_SIMD
- if (blockNode->OperIsSimdOrHWintrinsic())
+ if (blockNode->OperIsSimdOrHWintrinsic() || blockNode->IsCnsVec())
{
parent->AsOp()->gtOp2 = impNormStructVal(blockNode, structHnd, curLevel, forceNormalization);
alreadyNormalized = true;
int simdSize;
var_types simdType;
- NamedIntrinsic niZero;
NamedIntrinsic niEquals;
GenTree* cnsVec1 = nullptr;
simdSize = 32;
simdType = TYP_SIMD32;
- niZero = NI_Vector256_get_Zero;
niEquals = NI_Vector256_op_Equality;
// Special case: use a single vector for Length == 16
simdSize = 16;
simdType = TYP_SIMD16;
- niZero = NI_Vector128_get_Zero;
niEquals = NI_Vector128_op_Equality;
// Special case: use a single vector for Length == 8
return nullptr;
}
- GenTree* zero = gtNewSimdHWIntrinsicNode(simdType, niZero, baseType, simdSize);
+ GenTree* zero = gtNewZeroConNode(simdType, baseType);
GenTree* offset1 = gtNewIconNode(dataOffset, TYP_I_IMPL);
GenTree* offset2 = gtNewIconNode(dataOffset + len * sizeof(USHORT) - simdSize, TYP_I_IMPL);
assert(op->isContainedIntOrIImmed());
return OperandDesc(op->AsIntCon()->IconValue(), op->AsIntCon()->ImmedValNeedsReloc(compiler));
+ case GT_CNS_VEC:
+ {
+ switch (op->TypeGet())
+ {
+#if defined(FEATURE_SIMD)
+ case TYP_LONG:
+ case TYP_DOUBLE:
+ case TYP_SIMD8:
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+ simd8_t constValue = op->AsVecCon()->gtSimd8Val;
+ return OperandDesc(emit->emitSimd8Const(constValue));
+ }
+
+ case TYP_SIMD12:
+ case TYP_SIMD16:
+ {
+ simd16_t constValue = op->AsVecCon()->gtSimd16Val;
+ return OperandDesc(emit->emitSimd16Const(constValue));
+ }
+
+ case TYP_SIMD32:
+ {
+ simd32_t constValue = op->AsVecCon()->gtSimd32Val;
+ return OperandDesc(emit->emitSimd32Const(constValue));
+ }
+#endif // FEATURE_SIMD
+
+ default:
+ {
+ unreached();
+ }
+ }
+ }
+
default:
unreached();
}
case GT_CNS_LNG:
case GT_CNS_DBL:
case GT_CNS_STR:
+ case GT_CNS_VEC:
case GT_CLS_VAR_ADDR:
case GT_PHYSREG:
// These are all side-effect-free leaf nodes.
#ifdef FEATURE_HW_INTRINSICS
case GT_HWINTRINSIC:
- LowerHWIntrinsic(node->AsHWIntrinsic());
- break;
+ return LowerHWIntrinsic(node->AsHWIntrinsic());
#endif // FEATURE_HW_INTRINSICS
case GT_LCL_FLD:
else if (src->OperIs(GT_CNS_INT))
{
assert(src->IsIntegralConst(0) && "expected an INIT_VAL for non-zero init.");
+
#ifdef FEATURE_SIMD
if (varTypeIsSIMD(lclRegType))
{
- CorInfoType simdBaseJitType = comp->getBaseJitTypeOfSIMDLocal(lclStore);
- if (simdBaseJitType == CORINFO_TYPE_UNDEF)
- {
- // Lie about the type if we don't know/have it.
- simdBaseJitType = CORINFO_TYPE_FLOAT;
- }
- GenTreeSIMD* simdTree =
- comp->gtNewSIMDNode(lclRegType, src, SIMDIntrinsicInit, simdBaseJitType, varDsc->lvExactSize);
- BlockRange().InsertAfter(src, simdTree);
- LowerSIMD(simdTree);
- src = simdTree;
- lclStore->gtOp1 = src;
- convertToStoreObj = false;
+ GenTree* zeroCon = comp->gtNewZeroConNode(lclRegType, CORINFO_TYPE_FLOAT);
+
+ BlockRange().InsertAfter(src, zeroCon);
+ BlockRange().Remove(src);
+
+ src = zeroCon;
+ lclStore->gtOp1 = src;
}
- else
#endif // FEATURE_SIMD
- {
- convertToStoreObj = false;
- }
+
+ convertToStoreObj = false;
}
else if (src->OperIs(GT_LCL_VAR))
{
void LowerSIMD(GenTreeSIMD* simdNode);
#endif // FEATURE_SIMD
#ifdef FEATURE_HW_INTRINSICS
- void LowerHWIntrinsic(GenTreeHWIntrinsic* node);
+ GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
+ GenTree* LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
void LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
- void LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
- void LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
- void LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
+ GenTree* LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
+ GenTree* LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
#if defined(TARGET_XARCH)
void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
- void LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
+ GenTree* LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
GenTree* TryLowerAndOpToExtractLowestSetBit(GenTreeOp* andNode);
GenTree* TryLowerAndOpToAndNot(GenTreeOp* andNode);
GenTree* LowerModPow2(GenTree* node);
GenTree* LowerAddForPossibleContainment(GenTreeOp* node);
#endif // !TARGET_XARCH && !TARGET_ARM64
-
- union VectorConstant {
- int8_t i8[32];
- uint8_t u8[32];
- int16_t i16[16];
- uint16_t u16[16];
- int32_t i32[8];
- uint32_t u32[8];
- int64_t i64[4];
- uint64_t u64[4];
- float f32[8];
- double f64[4];
- };
-
- //----------------------------------------------------------------------------------------------
- // VectorConstantIsBroadcastedI64: Check N i64 elements in a constant vector for equality
- //
- // Arguments:
- // vecCns - Constant vector
- // count - Amount of i64 components to compare
- //
- // Returns:
- // true if N i64 elements of the given vector are equal
- static bool VectorConstantIsBroadcastedI64(VectorConstant& vecCns, int count)
- {
- assert(count >= 1 && count <= 4);
- for (int i = 1; i < count; i++)
- {
- if (vecCns.i64[i] != vecCns.i64[0])
- {
- return false;
- }
- }
- return true;
- }
-
- //----------------------------------------------------------------------------------------------
- // ProcessArgForHWIntrinsicCreate: Processes an argument for the Lowering::LowerHWIntrinsicCreate method
- //
- // Arguments:
- // arg - The argument to process
- // argIdx - The index of the argument being processed
- // vecCns - The vector constant being constructed
- // baseType - The base type of the vector constant
- //
- // Returns:
- // true if arg was a constant; otherwise, false
- static bool HandleArgForHWIntrinsicCreate(GenTree* arg, int argIdx, VectorConstant& vecCns, var_types baseType)
- {
- switch (baseType)
- {
- case TYP_BYTE:
- case TYP_UBYTE:
- {
- if (arg->IsCnsIntOrI())
- {
- vecCns.i8[argIdx] = static_cast<int8_t>(arg->AsIntCon()->gtIconVal);
- return true;
- }
- else
- {
- // We expect the VectorConstant to have been already zeroed
- assert(vecCns.i8[argIdx] == 0);
- }
- break;
- }
-
- case TYP_SHORT:
- case TYP_USHORT:
- {
- if (arg->IsCnsIntOrI())
- {
- vecCns.i16[argIdx] = static_cast<int16_t>(arg->AsIntCon()->gtIconVal);
- return true;
- }
- else
- {
- // We expect the VectorConstant to have been already zeroed
- assert(vecCns.i16[argIdx] == 0);
- }
- break;
- }
-
- case TYP_INT:
- case TYP_UINT:
- {
- if (arg->IsCnsIntOrI())
- {
- vecCns.i32[argIdx] = static_cast<int32_t>(arg->AsIntCon()->gtIconVal);
- return true;
- }
- else
- {
- // We expect the VectorConstant to have been already zeroed
- assert(vecCns.i32[argIdx] == 0);
- }
- break;
- }
-
- case TYP_LONG:
- case TYP_ULONG:
- {
-#if defined(TARGET_64BIT)
- if (arg->IsCnsIntOrI())
- {
- vecCns.i64[argIdx] = static_cast<int64_t>(arg->AsIntCon()->gtIconVal);
- return true;
- }
-#else
- if (arg->OperIsLong() && arg->AsOp()->gtOp1->IsCnsIntOrI() && arg->AsOp()->gtOp2->IsCnsIntOrI())
- {
- // 32-bit targets will decompose GT_CNS_LNG into two GT_CNS_INT
- // We need to reconstruct the 64-bit value in order to handle this
-
- INT64 gtLconVal = arg->AsOp()->gtOp2->AsIntCon()->gtIconVal;
- gtLconVal <<= 32;
- gtLconVal |= arg->AsOp()->gtOp1->AsIntCon()->gtIconVal;
-
- vecCns.i64[argIdx] = gtLconVal;
- return true;
- }
-#endif // TARGET_64BIT
- else
- {
- // We expect the VectorConstant to have been already zeroed
- assert(vecCns.i64[argIdx] == 0);
- }
- break;
- }
-
- case TYP_FLOAT:
- {
- if (arg->IsCnsFltOrDbl())
- {
- vecCns.f32[argIdx] = static_cast<float>(arg->AsDblCon()->gtDconVal);
- return true;
- }
- else
- {
- // We expect the VectorConstant to have been already zeroed
- // We check against the i32, rather than f32, to account for -0.0
- assert(vecCns.i32[argIdx] == 0);
- }
- break;
- }
-
- case TYP_DOUBLE:
- {
- if (arg->IsCnsFltOrDbl())
- {
- vecCns.f64[argIdx] = static_cast<double>(arg->AsDblCon()->gtDconVal);
- return true;
- }
- else
- {
- // We expect the VectorConstant to have been already zeroed
- // We check against the i64, rather than f64, to account for -0.0
- assert(vecCns.i64[argIdx] == 0);
- }
- break;
- }
-
- default:
- {
- unreached();
- }
- }
-
- return false;
- }
#endif // FEATURE_HW_INTRINSICS
//----------------------------------------------------------------------------------------------
if (!varTypeIsFloating(parentNode->TypeGet()))
{
#ifdef TARGET_ARM64
- if (parentNode->OperIsCompare() && childNode->IsFPZero())
+ if (parentNode->OperIsCompare() && childNode->IsFloatPositiveZero())
{
// Contain 0.0 constant in fcmp on arm64
// TODO: Enable for arm too (vcmp)
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
{
assert(node->TypeGet() != TYP_SIMD32);
// that the node is modified to either not be a HWIntrinsic node or that it is no longer
// the same intrinsic as when it came in.
- LowerHWIntrinsicCreate(node);
- assert(!node->OperIsHWIntrinsic() || (node->GetHWIntrinsicId() != intrinsicId));
- LowerNode(node);
- return;
+ return LowerHWIntrinsicCreate(node);
}
case NI_Vector64_Dot:
case NI_Vector128_Dot:
{
- LowerHWIntrinsicDot(node);
- return;
+ return LowerHWIntrinsicDot(node);
}
case NI_Vector64_op_Equality:
case NI_Vector128_op_Equality:
{
- LowerHWIntrinsicCmpOp(node, GT_EQ);
- return;
+ return LowerHWIntrinsicCmpOp(node, GT_EQ);
}
case NI_Vector64_op_Inequality:
case NI_Vector128_op_Inequality:
{
- LowerHWIntrinsicCmpOp(node, GT_NE);
- return;
+ return LowerHWIntrinsicCmpOp(node, GT_NE);
}
case NI_AdvSimd_FusedMultiplyAddScalar:
}
ContainCheckHWIntrinsic(node);
+ return node->gtNext;
}
//----------------------------------------------------------------------------------------------
// node - The hardware intrinsic node.
// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE
//
-void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
+GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
node->gtType = TYP_VOID;
node->ClearUnusedValue();
LowerNode(node);
- return;
+ return node->gtNext;
}
NamedIntrinsic cmpIntrinsic;
node->ClearUnusedValue();
LowerNode(node);
+ return node->gtNext;
}
//----------------------------------------------------------------------------------------------
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
var_types simdType = node->TypeGet();
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
var_types simdBaseType = node->GetSimdBaseType();
unsigned simdSize = node->GetSimdSize();
- VectorConstant vecCns = {};
+ simd32_t simd32Val = {};
if ((simdSize == 8) && (simdType == TYP_DOUBLE))
{
assert(varTypeIsArithmetic(simdBaseType));
assert(simdSize != 0);
- size_t argCnt = node->GetOperandCount();
- size_t cnsArgCnt = 0;
-
- // These intrinsics are meant to set the same value to every element.
- if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, vecCns, simdBaseType))
- {
- // Now assign the rest of the arguments.
- for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
- {
- HandleArgForHWIntrinsicCreate(node->Op(1), i, vecCns, simdBaseType);
- }
-
- cnsArgCnt = 1;
- }
- else
- {
- for (unsigned i = 1; i <= argCnt; i++)
- {
- if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, vecCns, simdBaseType))
- {
- cnsArgCnt++;
- }
- }
- }
- assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType))));
+ bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
+ size_t argCnt = node->GetOperandCount();
// Check if we have a cast that we can remove. Note that "IsValidConstForMovImm"
// will reset Op(1) if it finds such a cast, so we do not need to handle it here.
// TODO-Casts: why are casts from constants checked for here?
- if ((argCnt == cnsArgCnt) && (argCnt == 1) && IsValidConstForMovImm(node))
+ if (isConstant && (argCnt == 1) && IsValidConstForMovImm(node))
{
- // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector
+ // Set isConstant to false so we get lowered to a DuplicateToVector
// intrinsic, which will itself mark the node as contained.
- cnsArgCnt = 0;
+ isConstant = false;
}
- if (argCnt == cnsArgCnt)
+ if (isConstant)
{
+ assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16));
+
for (GenTree* arg : node->Operands())
{
BlockRange().Remove(arg);
}
- assert((simdSize == 8) || (simdSize == 16));
+ GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType, simdBaseJitType);
- if (VectorConstantIsBroadcastedI64(vecCns, simdSize / 8))
- {
- // If we are a single constant or if all parts are the same, we might be able to optimize
- // this even further for certain values, such as Zero or AllBitsSet.
+ vecCon->gtSimd32Val = simd32Val;
+ BlockRange().InsertBefore(node, vecCon);
- if (vecCns.i64[0] == 0)
- {
- node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero);
- return;
- }
- else if (vecCns.i64[0] == -1)
- {
- node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet);
- return;
- }
+ LIR::Use use;
+ if (BlockRange().TryGetUse(node, &use))
+ {
+ use.ReplaceWith(vecCon);
+ }
+ else
+ {
+ vecCon->SetUnusedValue();
}
- unsigned cnsSize = (simdSize == 12) ? 16 : simdSize;
- unsigned cnsAlign = cnsSize;
- var_types dataType = Compiler::getSIMDTypeForSize(simdSize);
-
- UNATIVE_OFFSET cnum = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType);
- CORINFO_FIELD_HANDLE hnd = comp->eeFindJitDataOffs(cnum);
- GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(TYP_I_IMPL, hnd);
- BlockRange().InsertBefore(node, clsVarAddr);
-
- node->ChangeOper(GT_IND);
- node->AsOp()->gtOp1 = clsVarAddr;
-
- // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial
- // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just
- // insert the non-constant values which should still allow some gains.
+ BlockRange().Remove(node);
- return;
+ return LowerNode(vecCon);
}
else if (argCnt == 1)
{
node->ChangeHWIntrinsicId((simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64
: NI_AdvSimd_DuplicateToVector128);
}
- return;
+
+ return LowerNode(node);
}
// We have the following (where simd is simd8 or simd16):
BlockRange().InsertBefore(opN, idx);
node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp1, idx, opN);
+
+ return LowerNode(node);
}
//----------------------------------------------------------------------------------------------
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
// return tmp2.ToScalar();
node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar, tmp2);
+
LowerNode(node);
- return;
+ return node->gtNext;
}
#endif // FEATURE_HW_INTRINSICS
const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc);
#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc))
+ if (storeLoc->TypeIs(TYP_SIMD8, TYP_SIMD12))
{
// If this is a store to memory, we can initialize a zero vector in memory from REG_ZR.
- if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister)
+ if ((op1->IsIntegralConst(0) || op1->IsVectorZero()) && varDsc->lvDoNotEnregister)
{
MakeSrcContained(storeLoc, op1);
- if (op1->IsSIMDZero())
- {
- MakeSrcContained(op1, op1->AsSIMD()->Op(1));
- }
}
return;
}
const LclVarDsc* varDsc = comp->lvaGetDesc(storeLoc);
#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc))
+ if (storeLoc->TypeIs(TYP_SIMD8, TYP_SIMD12))
{
// If this is a store to memory, we can initialize a zero vector in memory from REG_ZR.
- if ((op1->IsIntegralConst(0) || op1->IsSIMDZero()) && varDsc->lvDoNotEnregister)
+ if ((op1->IsIntegralConst(0) || op1->IsVectorZero()) && varDsc->lvDoNotEnregister)
{
// For an InitBlk we want op1 to be contained
MakeSrcContained(storeLoc, op1);
- if (op1->IsSIMDZero())
- {
- MakeSrcContained(op1, op1->gtGetOp1());
- }
}
return;
}
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
{
if (node->TypeGet() == TYP_SIMD12)
{
// it into 2x Vector128.Create intrinsics which themselves are also lowered into other
// intrinsics that are not Vector*.Create
- LowerHWIntrinsicCreate(node);
- assert(!node->OperIsHWIntrinsic() || (node->GetHWIntrinsicId() != intrinsicId));
- LowerNode(node);
- return;
+ return LowerHWIntrinsicCreate(node);
}
case NI_Vector128_Dot:
case NI_Vector256_Dot:
{
- LowerHWIntrinsicDot(node);
- return;
+ return LowerHWIntrinsicDot(node);
}
case NI_Vector128_GetElement:
// However, certain types may not have a direct equivalent
// in which case we specially handle them directly as GetElement
// and want to do the relevant containment checks.
- break;
+ ContainCheckHWIntrinsic(node);
}
- return;
+
+ return node->gtNext;
}
case NI_Vector128_WithElement:
case NI_Vector256_WithElement:
{
- LowerHWIntrinsicWithElement(node);
- return;
+ return LowerHWIntrinsicWithElement(node);
}
case NI_Vector128_op_Equality:
case NI_Vector256_op_Equality:
{
- LowerHWIntrinsicCmpOp(node, GT_EQ);
- return;
+ return LowerHWIntrinsicCmpOp(node, GT_EQ);
}
case NI_Vector128_op_Inequality:
case NI_Vector256_op_Inequality:
{
- LowerHWIntrinsicCmpOp(node, GT_NE);
- return;
+ return LowerHWIntrinsicCmpOp(node, GT_NE);
}
case NI_Vector128_ToScalar:
}
ContainCheckHWIntrinsic(node);
+ return node->gtNext;
}
//----------------------------------------------------------------------------------------------
// node - The hardware intrinsic node.
// cmpOp - The comparison operation, currently must be GT_EQ or GT_NE
//
-void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
+GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE;
- if (op2->IsIntegralConstVector(0) && comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ if (!varTypeIsFloating(simdBaseType) && op2->IsVectorZero() &&
+ comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
// On SSE4.1 or higher we can optimize comparisons against zero to
// just use PTEST. We can't support it for floating-point, however,
node->Op(1) = op1;
BlockRange().Remove(op2);
- if (op2->AsMultiOp()->GetOperandCount() == 1)
- {
- // Some zero vectors are Create/Initialization nodes with a constant zero operand
- // We should also remove this to avoid dead code
- assert(op2->AsMultiOp()->Op(1)->IsIntegralConst(0));
- BlockRange().Remove(op2->AsMultiOp()->Op(1));
- }
-
LIR::Use op1Use(BlockRange(), &node->Op(1), node);
ReplaceWithLclVar(op1Use);
op1 = node->Op(1);
LowerHWIntrinsicCC(node, NI_SSE41_PTEST, cmpCnd);
}
- return;
+ return node->gtNext;
}
NamedIntrinsic cmpIntrinsic;
node->gtType = TYP_VOID;
node->ClearUnusedValue();
- LowerNode(node);
+ return LowerNode(node);
}
//----------------------------------------------------------------------------------------------
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
var_types simdType = node->gtType;
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
var_types simdBaseType = node->GetSimdBaseType();
unsigned simdSize = node->GetSimdSize();
- VectorConstant vecCns = {};
+ simd32_t simd32Val = {};
if ((simdSize == 8) && (simdType == TYP_DOUBLE))
{
GenTree* tmp2 = nullptr;
GenTree* tmp3 = nullptr;
- size_t argCnt = node->GetOperandCount();
- size_t cnsArgCnt = 0;
+ bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
+ size_t argCnt = node->GetOperandCount();
- // These intrinsics are meant to set the same value to every element.
- if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, vecCns, simdBaseType))
- {
- // Now assign the rest of the arguments.
- for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
- {
- HandleArgForHWIntrinsicCreate(node->Op(1), i, vecCns, simdBaseType);
- }
-
- cnsArgCnt = 1;
- }
- else
+ if (isConstant)
{
- for (unsigned i = 1; i <= argCnt; i++)
- {
- if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, vecCns, simdBaseType))
- {
- cnsArgCnt++;
- }
- }
- }
- assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType))));
+ assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == 32));
- if (argCnt == cnsArgCnt)
- {
for (GenTree* arg : node->Operands())
{
#if !defined(TARGET_64BIT)
BlockRange().Remove(arg);
}
- assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == 32));
+ GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType, simdBaseJitType);
- if (((simdSize == 16) || (simdSize == 32)) && VectorConstantIsBroadcastedI64(vecCns, simdSize / 8))
- {
- // If we are a single constant or if all parts are the same, we might be able to optimize
- // this even further for certain values, such as Zero or AllBitsSet.
+ vecCon->gtSimd32Val = simd32Val;
+ BlockRange().InsertBefore(node, vecCon);
- if (vecCns.i64[0] == 0)
- {
- node->ResetHWIntrinsicId((simdSize == 16) ? NI_Vector128_get_Zero : NI_Vector256_get_Zero);
- return;
- }
- else if (vecCns.i64[0] == -1)
- {
- node->ResetHWIntrinsicId((simdSize == 16) ? NI_Vector128_get_AllBitsSet : NI_Vector256_get_AllBitsSet);
- return;
- }
+ LIR::Use use;
+ if (BlockRange().TryGetUse(node, &use))
+ {
+ use.ReplaceWith(vecCon);
+ }
+ else
+ {
+ vecCon->SetUnusedValue();
}
- unsigned cnsSize = (simdSize != 12) ? simdSize : 16;
- unsigned cnsAlign =
- (comp->compCodeOpt() != Compiler::SMALL_CODE) ? cnsSize : emitter::dataSection::MIN_DATA_ALIGN;
- var_types dataType = Compiler::getSIMDTypeForSize(simdSize);
-
- UNATIVE_OFFSET cnum = comp->GetEmitter()->emitDataConst(&vecCns, cnsSize, cnsAlign, dataType);
- CORINFO_FIELD_HANDLE hnd = comp->eeFindJitDataOffs(cnum);
- GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(TYP_I_IMPL, hnd);
- BlockRange().InsertBefore(node, clsVarAddr);
-
- node->ChangeOper(GT_IND);
- node->AsOp()->gtOp1 = clsVarAddr;
-
- // TODO-XARCH-CQ: We should be able to modify at least the paths that use Insert to trivially support partial
- // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just
- // insert the non-constant values which should still allow some gains.
+ BlockRange().Remove(node);
- return;
+ return LowerNode(vecCon);
}
else if (argCnt == 1)
{
LowerNode(tmp1);
node->ResetHWIntrinsicId(NI_AVX2_BroadcastScalarToVector256, tmp1);
- return;
+
+ return LowerNode(node);
}
assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX));
tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector128_Create, simdBaseJitType, 16);
BlockRange().InsertAfter(op1, tmp1);
- LowerNode(tmp1);
node->Op(1) = tmp1;
+ LowerNode(tmp1);
+
LIR::Use tmp1Use(BlockRange(), &node->Op(1), node);
ReplaceWithLclVar(tmp1Use);
tmp1 = node->Op(1);
tmp3 =
comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, tmp2, NI_Vector128_ToVector256Unsafe, simdBaseJitType, 16);
BlockRange().InsertAfter(tmp2, tmp3);
- LowerNode(tmp3);
idx = comp->gtNewIconNode(0x01, TYP_INT);
BlockRange().InsertAfter(tmp3, idx);
node->ResetHWIntrinsicId(NI_AVX_InsertVector128, comp, tmp3, tmp1, idx);
- return;
+ LowerNode(tmp3);
+
+ return LowerNode(node);
}
// We will be constructing the following parts:
// return Avx2.BroadcastScalarToVector128(tmp1);
node->ChangeHWIntrinsicId(NI_AVX2_BroadcastScalarToVector128, tmp1);
- return;
+
+ return LowerNode(node);
}
switch (simdBaseType)
{
// We will be constructing the following parts:
// ...
- // tmp2 = HWINTRINSIC simd16 ubyte get_Zero
+ // tmp2 = CNS_VEC simd16 0
// /--* tmp1 simd16
// +--* tmp2 simd16
// node = * HWINTRINSIC simd16 ubyte Shuffle
// var tmp2 = Vector128<byte>.Zero;
// return Ssse3.Shuffle(tmp1, tmp2);
- tmp2 =
- comp->gtNewSimdHWIntrinsicNode(simdType, NI_Vector128_get_Zero, CORINFO_TYPE_UBYTE, simdSize);
+ tmp2 = comp->gtNewZeroConNode(simdType, simdBaseJitType);
BlockRange().InsertAfter(tmp1, tmp2);
LowerNode(tmp2);
}
}
- return;
+ return LowerNode(node);
}
GenTree* op2 = node->Op(2);
GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt,
NI_Vector128_Create, simdBaseJitType, 16);
BlockRange().InsertAfter(node->Op(halfArgCnt), lo);
- LowerNode(lo);
GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt,
NI_Vector128_Create, simdBaseJitType, 16);
BlockRange().InsertAfter(node->Op(argCnt), hi);
- LowerNode(hi);
idx = comp->gtNewIconNode(0x01, TYP_INT);
BlockRange().InsertAfter(hi, idx);
assert(argCnt >= 3);
node->ResetHWIntrinsicId(NI_AVX_InsertVector128, comp, lo, hi, idx);
- return;
+
+ LowerNode(lo);
+ LowerNode(hi);
+
+ return LowerNode(node);
}
// We will be constructing the following parts:
unreached();
}
}
+
+ return LowerNode(node);
}
//----------------------------------------------------------------------------------------------
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
var_types simdType = node->TypeGet();
assert(node != result);
LowerNode(node);
}
+
+ return node->gtNext;
}
//----------------------------------------------------------------------------------------------
// Arguments:
// node - The hardware intrinsic node.
//
-void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
+GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
{
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
node->SetSimdSize(16);
node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3);
- LowerNode(node);
- return;
+
+ return LowerNode(node);
}
case TYP_DOUBLE:
LowerNode(tmp3);
node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3);
- LowerNode(node);
- return;
+
+ return LowerNode(node);
}
multiply = NI_SSE_Multiply;
LowerNode(tmp3);
node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3);
- LowerNode(node);
- return;
+
+ return LowerNode(node);
}
multiply = NI_SSE2_Multiply;
tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, cns0, cns1, cns2, cns3, NI_Vector128_Create,
CORINFO_TYPE_INT, 16);
BlockRange().InsertAfter(cns3, tmp1);
- LowerNode(tmp1);
op1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, tmp1, NI_SSE_And, simdBaseJitType, simdSize);
BlockRange().InsertAfter(tmp1, op1);
+
+ LowerNode(tmp1);
LowerNode(op1);
}
}
// return tmp1.ToScalar();
node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp1);
- LowerNode(node);
+
+ return LowerNode(node);
}
//----------------------------------------------------------------------------------------------
if (varTypeIsSIMD(storeLoc))
{
assert(!op1->IsCnsIntOrI());
- if (storeLoc->TypeIs(TYP_SIMD12) && op1->IsSIMDZero() && varDsc->lvDoNotEnregister)
- {
- // For a SIMD12 store we can zero from integer registers more easily.
- MakeSrcContained(storeLoc, op1);
- GenTree* constNode = op1->gtGetOp1();
- assert(constNode->OperIsConst());
- constNode->ClearContained();
- constNode->gtType = TYP_INT;
- constNode->SetOper(GT_CNS_INT);
- }
return;
}
#endif // FEATURE_SIMD
}
else
#endif // !TARGET_64BIT
- if (op1->IsFPZero() || op1->IsIntegralConst(0) ||
+ if (op1->IsFloatPositiveZero() || op1->IsIntegralConst(0) ||
(varTypeIsIntegral(simdNode->GetSimdBaseType()) && op1->IsIntegralConst(-1)))
{
MakeSrcContained(simdNode, op1);
//
canBeContained = true;
}
+ else if (node->IsCnsVec())
+ {
+ GenTreeVecCon* vecCon = node->AsVecCon();
+ canBeContained = !vecCon->IsAllBitsSet() && !vecCon->IsZero();
+ }
}
return canBeContained;
}
break;
+ case GT_CNS_VEC:
+ {
+ GenTreeVecCon* vecCon = tree->AsVecCon();
+
+ if (vecCon->IsAllBitsSet() || vecCon->IsZero())
+ {
+ // Directly encode constant to instructions.
+ }
+ else
+ {
+ // Reserve int to load constant from memory (IF_LARGELDC)
+ buildInternalIntRegisterDefForNode(tree);
+ buildInternalRegisterUses();
+ }
+
+ srcCount = 0;
+ assert(dstCount == 1);
+
+ RefPosition* def = BuildDef(tree);
+ def->getInterval()->isConstant = true;
+ break;
+ }
+
case GT_BOX:
case GT_COMMA:
case GT_QMARK:
// First, define internal registers.
#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc) && !op1->IsCnsIntOrI() && (storeLoc->TypeGet() == TYP_SIMD12))
+ if (varTypeIsSIMD(storeLoc) && !op1->IsVectorZero() && (storeLoc->TypeGet() == TYP_SIMD12))
{
// Need an additional register to extract upper 4 bytes of Vector3,
// it has to be float for x86.
#endif // !TARGET_64BIT
else if (op1->isContained())
{
-#ifdef TARGET_XARCH
- if (varTypeIsSIMD(storeLoc))
- {
- // This is the zero-init case, and we need a register to hold the zero.
- // (On Arm64 we can just store REG_ZR.)
- assert(op1->IsSIMDZero());
- singleUseRef = BuildUse(op1->gtGetOp1());
- srcCount = 1;
- }
- else
-#endif
- {
- srcCount = 0;
- }
+ srcCount = 0;
}
else
{
#if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
else if (isMultiRegArg && varTypeIsSIMD(argx->TypeGet()))
{
+ GenTree* nodeToCheck = argx;
+
+ if (nodeToCheck->OperIs(GT_OBJ))
+ {
+ nodeToCheck = nodeToCheck->AsObj()->gtOp1;
+
+ if (nodeToCheck->OperIs(GT_ADDR))
+ {
+ nodeToCheck = nodeToCheck->AsOp()->gtOp1;
+ }
+ }
+
// SIMD types do not need the optimization below due to their sizes
- if (argx->OperIsSimdOrHWintrinsic() ||
- (argx->OperIs(GT_OBJ) && argx->AsObj()->gtOp1->OperIs(GT_ADDR) &&
- argx->AsObj()->gtOp1->AsOp()->gtOp1->OperIsSimdOrHWintrinsic()))
+ if (nodeToCheck->OperIsSimdOrHWintrinsic() || nodeToCheck->IsCnsVec())
{
SetNeedsTemp(&arg);
}
return nullptr;
}
- if (src->IsCall() || src->OperIsSIMD())
+ if (src->IsCall() || src->OperIsSimdOrHWintrinsic() || src->IsCnsVec())
{
// Can't take ADDR from these nodes, let fgMorphCopyBlock handle it, #11413.
return nullptr;
noway_assert(src->IsIntegralConst(0));
noway_assert(destVarDsc != nullptr);
- src = gtNewSIMDNode(asgType, src, SIMDIntrinsicInit, destVarDsc->GetSimdBaseJitType(), size);
+ src = gtNewZeroConNode(asgType, CORINFO_TYPE_FLOAT);
}
else
#endif
*simdBaseJitTypeOut = simdNode->GetSimdBaseJitType();
}
#endif // FEATURE_HW_INTRINSICS
+ else if (obj->IsCnsVec())
+ {
+ ret = obj;
+ GenTreeVecCon* vecCon = obj->AsVecCon();
+ *simdSizeOut = vecCon->GetSimdSize();
+ *simdBaseJitTypeOut = vecCon->GetSimdBaseJitType();
+ }
}
}
if (ret != nullptr)
return node;
}
- switch (node->GetHWIntrinsicId())
+ simd32_t simd32Val = {};
+
+ if (GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val))
{
- case NI_Vector128_Create:
-#if defined(TARGET_XARCH)
- case NI_Vector256_Create:
-#elif defined(TARGET_ARM64)
- case NI_Vector64_Create:
-#endif
- {
- bool hwAllArgsAreConstZero = true;
- for (GenTree* arg : node->Operands())
- {
- if (!arg->IsIntegralConst(0) && !arg->IsFloatPositiveZero())
- {
- hwAllArgsAreConstZero = false;
- break;
- }
- }
+ GenTreeVecCon* vecCon = gtNewVconNode(node->TypeGet(), node->GetSimdBaseJitType());
- if (hwAllArgsAreConstZero)
- {
- switch (node->GetHWIntrinsicId())
- {
- case NI_Vector128_Create:
- {
- node->ResetHWIntrinsicId(NI_Vector128_get_Zero);
- break;
- }
-#if defined(TARGET_XARCH)
- case NI_Vector256_Create:
- {
- node->ResetHWIntrinsicId(NI_Vector256_get_Zero);
- break;
- }
-#elif defined(TARGET_ARM64)
- case NI_Vector64_Create:
- {
- node->ResetHWIntrinsicId(NI_Vector64_get_Zero);
- break;
- }
-#endif
- default:
- unreached();
- }
- }
- break;
+ for (GenTree* arg : node->Operands())
+ {
+ DEBUG_DESTROY_NODE(arg);
}
- default:
- break;
+ vecCon->gtSimd32Val = simd32Val;
+ INDEBUG(vecCon->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
+ return vecCon;
}
return node;
GenTree* op2 = hw->Op(2);
if (!gtIsActiveCSE_Candidate(hw))
{
- if (op2->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op2))
+ if (op2->IsVectorZero() && !gtIsActiveCSE_Candidate(op2))
{
DEBUG_DESTROY_NODE(hw);
DEBUG_DESTROY_NODE(op2);
m_result->gtFlags |= (m_dst->gtFlags & GTF_ALL_EFFECT);
#if FEATURE_SIMD
- if (varTypeIsSIMD(m_asg) && (m_dst == m_dstLclNode))
+ if (varTypeIsSIMD(m_asg) && (m_dst == m_dstLclNode) && m_src->IsIntegralConst(0))
{
- // For a SIMD local init we need to call SIMDIntrinsic init.
- // We need this block becuase morph does not create SIMD init for promoted lclVars.
- assert(m_src->IsIntegralConst(0) || m_src->IsFPZero());
assert(m_dstVarDsc != nullptr);
- const var_types asgType = m_asg->TypeGet();
- CorInfoType simdBaseJitType = m_dstVarDsc->GetSimdBaseJitType();
- m_src = m_comp->gtNewSIMDNode(asgType, m_src, SIMDIntrinsicInit, simdBaseJitType, m_blockSize);
+ m_src = m_comp->gtNewZeroConNode(m_asg->TypeGet(), CORINFO_TYPE_FLOAT);
m_result->AsOp()->gtOp2 = m_src;
}
#endif // FEATURE_SIMD
case GT_CNS_INT:
case GT_CNS_DBL:
case GT_CNS_STR:
+ case GT_CNS_VEC:
return true; // We reach here only when CSE_CONSTS is enabled
case GT_ARR_ELEM:
addr->gtType = simdType;
use.ReplaceWith(addr);
}
- else if (addr->OperIs(GT_ADDR) && addr->AsUnOp()->gtGetOp1()->OperIsSimdOrHWintrinsic())
+ else if (addr->OperIs(GT_ADDR))
{
- // If we have IND(ADDR(SIMD)) then we can keep only the SIMD node.
- // This is a special tree created by impNormStructVal to preserve the class layout
- // needed by call morphing on an OBJ node. This information is no longer needed at
- // this point (and the address of a SIMD node can't be obtained anyway).
+ GenTree* location = addr->AsUnOp()->gtGetOp1();
- BlockRange().Remove(indir);
- BlockRange().Remove(addr);
+ if (location->OperIsSimdOrHWintrinsic() || location->IsCnsVec())
+ {
+ // If we have IND(ADDR(SIMD)) then we can keep only the SIMD node.
+ // This is a special tree created by impNormStructVal to preserve the class layout
+ // needed by call morphing on an OBJ node. This information is no longer needed at
+ // this point (and the address of a SIMD node can't be obtained anyway).
+
+ BlockRange().Remove(indir);
+ BlockRange().Remove(addr);
- use.ReplaceWith(addr->AsUnOp()->gtGetOp1());
+ use.ReplaceWith(addr->AsUnOp()->gtGetOp1());
+ }
}
#endif // FEATURE_SIMD
}
{
if (location->OperIs(GT_LCL_VAR))
{
- var_types simdType = location->TypeGet();
- GenTree* initVal = assignment->AsOp()->gtOp2;
+ var_types simdType = location->TypeGet();
+ GenTree* initVal = assignment->AsOp()->gtOp2;
+
CorInfoType simdBaseJitType = comp->getBaseJitTypeOfSIMDLocal(location);
if (simdBaseJitType == CORINFO_TYPE_UNDEF)
{
// Lie about the type if we don't know/have it.
simdBaseJitType = CORINFO_TYPE_FLOAT;
}
- GenTreeSIMD* simdTree =
- comp->gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType, genTypeSize(simdType));
- assignment->gtOp2 = simdTree;
- value = simdTree;
- BlockRange().InsertAfter(initVal, simdTree);
+ if (initVal->IsIntegralConst(0))
+ {
+ GenTree* zeroCon = comp->gtNewZeroConNode(simdType, simdBaseJitType);
+
+ assignment->gtOp2 = zeroCon;
+ value = zeroCon;
+
+ BlockRange().InsertAfter(initVal, zeroCon);
+ BlockRange().Remove(initVal);
+ }
+ else
+ {
+ GenTreeSIMD* simdTree = comp->gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType,
+ genTypeSize(simdType));
+ assignment->gtOp2 = simdTree;
+ value = simdTree;
+
+ BlockRange().InsertAfter(initVal, simdTree);
+ }
}
}
#endif // FEATURE_SIMD
}
#endif // FEATURE_HW_INTRINSICS
+#if defined(FEATURE_SIMD)
+ case GT_CNS_VEC:
+ {
+ GenTreeVecCon* vecCon = node->AsVecCon();
+
+ // TODO-1stClassStructs: do not retype SIMD nodes
+
+ if ((vecCon->TypeIs(TYP_I_IMPL)) && (vecCon->GetSimdSize() == TARGET_POINTER_SIZE))
+ {
+ assert(genTypeSize(vecCon->GetSimdBaseType()) == 4);
+ vecCon->gtType = TYP_SIMD8;
+ }
+ break;
+ }
+#endif // FEATURE_SIMD
+
default:
// Check that we don't have nodes not allowed in HIR here.
assert((node->DebugOperKind() & DBK_NOTHIR) == 0);
#endif
};
+struct simd8_t
+{
+ union {
+ float f32[2];
+ double f64[1];
+ int8_t i8[8];
+ int16_t i16[4];
+ int32_t i32[2];
+ int64_t i64[1];
+ uint8_t u8[8];
+ uint16_t u16[4];
+ uint32_t u32[2];
+ uint64_t u64[1];
+ };
+
+ bool operator==(const simd8_t& other) const
+ {
+ return (u64[0] == other.u64[0]);
+ }
+
+ bool operator!=(const simd8_t& other) const
+ {
+ return (u64[0] != other.u64[0]);
+ }
+};
+
+struct simd12_t
+{
+ union {
+ float f32[3];
+ int8_t i8[12];
+ int16_t i16[6];
+ int32_t i32[3];
+ uint8_t u8[12];
+ uint16_t u16[6];
+ uint32_t u32[3];
+ };
+
+ bool operator==(const simd12_t& other) const
+ {
+ return (u32[0] == other.u32[0]) && (u32[1] == other.u32[1]) && (u32[2] == other.u32[2]);
+ }
+
+ bool operator!=(const simd12_t& other) const
+ {
+ return (u32[0] != other.u32[0]) || (u32[1] != other.u32[1]) || (u32[2] != other.u32[2]);
+ }
+};
+
+struct simd16_t
+{
+ union {
+ float f32[4];
+ double f64[2];
+ int8_t i8[16];
+ int16_t i16[8];
+ int32_t i32[4];
+ int64_t i64[2];
+ uint8_t u8[16];
+ uint16_t u16[8];
+ uint32_t u32[4];
+ uint64_t u64[2];
+ simd8_t v64[2];
+ };
+
+ bool operator==(const simd16_t& other) const
+ {
+ return (u64[0] == other.u64[0]) && (u64[1] == other.u64[1]);
+ }
+
+ bool operator!=(const simd16_t& other) const
+ {
+ return (u64[0] != other.u64[0]) || (u64[1] != other.u64[1]);
+ }
+};
+
+struct simd32_t
+{
+ union {
+ float f32[8];
+ double f64[4];
+ int8_t i8[32];
+ int16_t i16[16];
+ int32_t i32[8];
+ int64_t i64[4];
+ uint8_t u8[32];
+ uint16_t u16[16];
+ uint32_t u32[8];
+ uint64_t u64[4];
+ simd8_t v64[4];
+ simd16_t v128[2];
+ };
+
+ bool operator==(const simd32_t& other) const
+ {
+ return (u64[0] == other.u64[0]) && (u64[1] == other.u64[1]) && (u64[2] == other.u64[2]) &&
+ (u64[3] == other.u64[3]);
+ }
+
+ bool operator!=(const simd32_t& other) const
+ {
+ return (u64[0] != other.u64[0]) || (u64[1] != other.u64[1]) || (u64[2] != other.u64[2]) ||
+ (u64[3] != other.u64[3]);
+ }
+};
+
#ifdef FEATURE_SIMD
#ifdef DEBUG
switch (intrinsic)
{
#if defined(TARGET_XARCH)
+ case NI_VectorT128_get_AllBitsSet:
+ case NI_VectorT256_get_AllBitsSet:
+ {
+ return gtNewAllBitsSetConNode(retType, simdBaseJitType);
+ }
+
+ case NI_VectorT128_get_Count:
+ case NI_VectorT256_get_Count:
+ {
+ GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT);
+ countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
+ return countNode;
+ }
+
case NI_Vector2_get_One:
case NI_Vector3_get_One:
case NI_Vector4_get_One:
case NI_VectorT128_get_One:
case NI_VectorT256_get_One:
{
+ GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType);
+ uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
+
switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
+ {
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd32Val.u8[index] = 1;
+ }
+ break;
+ }
+
case TYP_SHORT:
case TYP_USHORT:
+ {
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd32Val.u16[index] = 1;
+ }
+ break;
+ }
+
case TYP_INT:
case TYP_UINT:
{
- op1 = gtNewIconNode(1, TYP_INT);
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd32Val.u32[index] = 1;
+ }
break;
}
case TYP_LONG:
case TYP_ULONG:
{
- op1 = gtNewLconNode(1);
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd32Val.u64[index] = 1;
+ }
break;
}
case TYP_FLOAT:
+ {
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd32Val.f32[index] = 1.0f;
+ }
+ break;
+ }
+
case TYP_DOUBLE:
{
- op1 = gtNewDconNode(1.0, simdBaseType);
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd32Val.f64[index] = 1.0;
+ }
break;
}
}
}
- return gtNewSimdCreateBroadcastNode(retType, op1, simdBaseJitType, simdSize,
- /* isSimdAsHWIntrinsic */ true);
+ return vecCon;
+ }
+
+ case NI_Vector2_get_Zero:
+ case NI_Vector3_get_Zero:
+ case NI_Vector4_get_Zero:
+ case NI_VectorT128_get_Zero:
+ case NI_VectorT256_get_Zero:
+ {
+ return gtNewZeroConNode(retType, simdBaseJitType);
+ }
+#elif defined(TARGET_ARM64)
+ case NI_VectorT128_get_AllBitsSet:
+ {
+ return gtNewAllBitsSetConNode(retType, simdBaseJitType);
}
case NI_VectorT128_get_Count:
- case NI_VectorT256_get_Count:
{
GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT);
countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
return countNode;
}
-#elif defined(TARGET_ARM64)
+
case NI_Vector2_get_One:
case NI_Vector3_get_One:
case NI_Vector4_get_One:
case NI_VectorT128_get_One:
{
+ GenTreeVecCon* vecCon = gtNewVconNode(retType, simdBaseJitType);
+ uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
+
switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
+ {
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd16Val.u8[index] = 1;
+ }
+ break;
+ }
+
case TYP_SHORT:
case TYP_USHORT:
+ {
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd16Val.u16[index] = 1;
+ }
+ break;
+ }
+
case TYP_INT:
case TYP_UINT:
{
- op1 = gtNewIconNode(1, TYP_INT);
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd16Val.u32[index] = 1;
+ }
break;
}
case TYP_LONG:
case TYP_ULONG:
{
- op1 = gtNewLconNode(1);
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd16Val.u64[index] = 1;
+ }
break;
}
case TYP_FLOAT:
+ {
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd16Val.f32[index] = 1.0f;
+ }
+ break;
+ }
+
case TYP_DOUBLE:
{
- op1 = gtNewDconNode(1.0, simdBaseType);
+ for (uint32_t index = 0; index < simdLength; index++)
+ {
+ vecCon->gtSimd16Val.f64[index] = 1.0;
+ }
break;
}
}
}
- return gtNewSimdCreateBroadcastNode(retType, op1, simdBaseJitType, simdSize,
- /* isSimdAsHWIntrinsic */ true);
+ return vecCon;
}
- case NI_VectorT128_get_Count:
+ case NI_Vector2_get_Zero:
+ case NI_Vector3_get_Zero:
+ case NI_Vector4_get_Zero:
+ case NI_VectorT128_get_Zero:
{
- GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT);
- countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
- return countNode;
+ return gtNewZeroConNode(retType, simdBaseJitType);
}
#else
#error Unsupported platform
SIMD_AS_HWINTRINSIC_NM(Vector2, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod)
SIMD_AS_HWINTRINSIC_ID(Vector2, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector64_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod)
SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero , NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod)
SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Dot, 2, {NI_Vector128_Dot, NI_Vector128_Dot, NI_Vector128_Dot, NI_Vector128_Dot, NI_Vector128_Dot, NI_Vector128_Dot, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Vector128_Dot}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Floor, NI_AdvSimd_Arm64_Floor}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Item, 2, {NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg)
SIMD_AS_HWINTRINSIC_ID(VectorT128, get_One, 0, {NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_NM(Vector2, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod)
SIMD_AS_HWINTRINSIC_ID(Vector2, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector2, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector2, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_NM(Vector3, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod)
SIMD_AS_HWINTRINSIC_ID(Vector3, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector3, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector3, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_NM(Vector4, CreateBroadcast, ".ctor", 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_CreateBroadcast, NI_Illegal}, SimdAsHWIntrinsicFlag::InstanceMethod)
SIMD_AS_HWINTRINSIC_ID(Vector4, Dot, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector4, get_One, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_One, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector128_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(Vector4, get_Zero, 0, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_get_Zero, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Dot, 2, {NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Vector128_Dot, NI_VectorT128_Dot, NI_VectorT128_Dot, NI_Illegal, NI_Illegal, NI_Vector128_Dot, NI_Vector128_Dot}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE41_Floor, NI_SSE41_Floor}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet, NI_Vector128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128, get_AllBitsSet, 0, {NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet, NI_VectorT128_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Count, 0, {NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count, NI_VectorT128_get_Count}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Item, 2, {NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item, NI_VectorT128_get_Item}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg)
SIMD_AS_HWINTRINSIC_ID(VectorT128, get_One, 0, {NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One, NI_VectorT128_get_One}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero, NI_Vector128_get_Zero}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128, get_Zero, 0, {NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero, NI_VectorT128_get_Zero}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, Dot, 2, {NI_Illegal, NI_Illegal, NI_Vector256_Dot, NI_Vector256_Dot, NI_Vector256_Dot, NI_Vector256_Dot, NI_Illegal, NI_Illegal, NI_Vector256_Dot, NI_Vector256_Dot}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, Floor, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Floor, NI_AVX_Floor}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT256, get_AllBitsSet, 0, {NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet, NI_Vector256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256, get_AllBitsSet, 0, {NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet, NI_VectorT256_get_AllBitsSet}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Count, 0, {NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count, NI_VectorT256_get_Count}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Item, 2, {NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item, NI_VectorT256_get_Item}, SimdAsHWIntrinsicFlag::InstanceMethod | SimdAsHWIntrinsicFlag::BaseTypeFromThisArg)
SIMD_AS_HWINTRINSIC_ID(VectorT256, get_One, 0, {NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One, NI_VectorT256_get_One}, SimdAsHWIntrinsicFlag::None)
-SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Zero, 0, {NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero, NI_Vector256_get_Zero}, SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256, get_Zero, 0, {NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero, NI_VectorT256_get_Zero}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None)
SIMD_AS_HWINTRINSIC_ID(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None)
#endif // !defined(TARGET_64BIT)
if (op1->isContained())
{
- if (op1->IsIntegralConst(0) || op1->IsFPZero())
+ if (op1->IsIntegralConst(0) || op1->IsFloatPositiveZero())
{
genSIMDZero(targetType, baseType, targetReg);
}
unsigned varNum = lclVar->GetLclNum();
assert(varNum < compiler->lvaCount);
- regNumber tmpReg = treeNode->GetSingleTempReg();
- GenTree* op1 = lclVar->gtOp1;
- if (op1->isContained())
- {
- // This is only possible for a zero-init.
- assert(op1->IsIntegralConst(0) || op1->IsSIMDZero());
- genSIMDZero(TYP_SIMD16, op1->AsSIMD()->GetSimdBaseType(), tmpReg);
-
- // store lower 8 bytes
- GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, tmpReg, varNum, offs);
-
- // Store upper 4 bytes
- GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8);
-
- return;
- }
+ GenTree* op1 = lclVar->gtOp1;
assert(!op1->isContained());
regNumber operandReg = genConsumeReg(op1);
// store lower 8 bytes
GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs);
- // Extract upper 4-bytes from operandReg
- GetEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
+ if (!op1->IsVectorZero())
+ {
+ regNumber tmpReg = treeNode->GetSingleTempReg();
+
+ // Extract upper 4-bytes from operandReg
+ GetEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
+
+ operandReg = tmpReg;
+ }
// Store upper 4 bytes
- GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8);
+ GetEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, operandReg, varNum, offs + 8);
}
//-----------------------------------------------------------------------------
, m_floatCnsMap(nullptr)
, m_doubleCnsMap(nullptr)
, m_byrefCnsMap(nullptr)
+#if defined(FEATURE_SIMD)
+ , m_simd8CnsMap(nullptr)
+ , m_simd12CnsMap(nullptr)
+ , m_simd16CnsMap(nullptr)
+ , m_simd32CnsMap(nullptr)
+#endif // FEATURE_SIMD
, m_VNFunc0Map(nullptr)
, m_VNFunc1Map(nullptr)
, m_VNFunc2Map(nullptr)
// Since this value is always the same, we represent it as a static.
m_defs = &s_specialRefConsts[0];
break; // Nothing to do.
+
+#if defined(FEATURE_SIMD)
+ case TYP_SIMD8:
+ {
+ m_defs = new (alloc) Alloc<TYP_SIMD8>::Type[ChunkSize];
+ break;
+ }
+
+ case TYP_SIMD12:
+ {
+ m_defs = new (alloc) Alloc<TYP_SIMD12>::Type[ChunkSize];
+ break;
+ }
+
+ case TYP_SIMD16:
+ {
+ m_defs = new (alloc) Alloc<TYP_SIMD16>::Type[ChunkSize];
+ break;
+ }
+
+ case TYP_SIMD32:
+ {
+ m_defs = new (alloc) Alloc<TYP_SIMD32>::Type[ChunkSize];
+ break;
+ }
+#endif // FEATURE_SIMD
+
default:
assert(false); // Should not reach here.
}
return VnForConst(cnsVal, GetByrefCnsMap(), TYP_BYREF);
}
+#if defined(FEATURE_SIMD)
+ValueNum ValueNumStore::VNForSimd8Con(simd8_t cnsVal)
+{
+ return VnForConst(cnsVal, GetSimd8CnsMap(), TYP_SIMD8);
+}
+
+ValueNum ValueNumStore::VNForSimd12Con(simd12_t cnsVal)
+{
+ return VnForConst(cnsVal, GetSimd12CnsMap(), TYP_SIMD12);
+}
+
+ValueNum ValueNumStore::VNForSimd16Con(simd16_t cnsVal)
+{
+ return VnForConst(cnsVal, GetSimd16CnsMap(), TYP_SIMD16);
+}
+
+ValueNum ValueNumStore::VNForSimd32Con(simd32_t cnsVal)
+{
+ return VnForConst(cnsVal, GetSimd32CnsMap(), TYP_SIMD32);
+}
+#endif // FEATURE_SIMD
+
ValueNum ValueNumStore::VNForCastOper(var_types castToType, bool srcIsUnsigned)
{
assert(castToType != TYP_STRUCT);
#ifdef FEATURE_SIMD
case TYP_SIMD8:
+ {
+ return VNForSimd8Con({});
+ }
+
case TYP_SIMD12:
+ {
+ return VNForSimd12Con({});
+ }
+
case TYP_SIMD16:
+ {
+ return VNForSimd16Con({});
+ }
+
case TYP_SIMD32:
- // We do not have the base type - a "fake" one will have to do. Note that we cannot
- // use the HWIntrinsic "get_Zero" VNFunc here. This is because they only represent
- // "fully zeroed" vectors, and here we may be loading one from memory, leaving upper
- // bits undefined. So using "SIMD_Init" is "the next best thing", so to speak, and
- // TYP_FLOAT is one of the more popular base types, so that's why we use it here.
- return VNForFunc(typ, VNF_SIMD_Init, VNForFloatCon(0), VNForSimdType(genTypeSize(typ), CORINFO_TYPE_FLOAT));
+ {
+ return VNForSimd32Con({});
+ }
#endif // FEATURE_SIMD
// These should be unreached.
return ConstantValue<float>(argVN);
}
+#if defined(FEATURE_SIMD)
+// Given a simd8 constant value number return its value as a simd8.
+//
+simd8_t ValueNumStore::GetConstantSimd8(ValueNum argVN)
+{
+ assert(IsVNConstant(argVN));
+ assert(TypeOfVN(argVN) == TYP_SIMD8);
+
+ return ConstantValue<simd8_t>(argVN);
+}
+
+// Given a simd12 constant value number return its value as a simd12.
+//
+simd12_t ValueNumStore::GetConstantSimd12(ValueNum argVN)
+{
+ assert(IsVNConstant(argVN));
+ assert(TypeOfVN(argVN) == TYP_SIMD12);
+
+ return ConstantValue<simd12_t>(argVN);
+}
+
+// Given a simd16 constant value number return its value as a simd16.
+//
+simd16_t ValueNumStore::GetConstantSimd16(ValueNum argVN)
+{
+ assert(IsVNConstant(argVN));
+ assert(TypeOfVN(argVN) == TYP_SIMD16);
+
+ return ConstantValue<simd16_t>(argVN);
+}
+
+// Given a simd32 constant value number return its value as a simd32.
+//
+simd32_t ValueNumStore::GetConstantSimd32(ValueNum argVN)
+{
+ assert(IsVNConstant(argVN));
+ assert(TypeOfVN(argVN) == TYP_SIMD32);
+
+ return ConstantValue<simd32_t>(argVN);
+}
+#endif // FEATURE_SIMD
+
// Compute the proper value number when the VNFunc has all constant arguments
// This essentially performs constant folding at value numbering time
//
}
}
-//------------------------------------------------------------------------
-// IsVNVectorZero: Checks if the value number is a Vector*_get_Zero.
-//
-// Arguments:
-// vn - The value number.
-//
-// Return Value:
-// true - The value number is a Vector*_get_Zero.
-// false - The value number is not a Vector*_get_Zero.
-bool ValueNumStore::IsVNVectorZero(ValueNum vn)
-{
-#ifdef FEATURE_SIMD
- VNSimdTypeInfo vnInfo = GetVectorZeroSimdTypeOfVN(vn);
- // Check the size to see if we got a valid SIMD type.
- // '0' means it is not valid.
- if (vnInfo.m_simdSize != 0)
- {
- return true;
- }
-#endif
- return false;
-}
-
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// GetSimdTypeOfVN: Returns the SIMD type information based on the given value number.
-//
-// Arguments:
-// vn - The value number.
-//
-// Return Value:
-// Returns VNSimdTypeInfo(0, CORINFO_TYPE_UNDEF) if the given value number has not been given a SIMD type.
-VNSimdTypeInfo ValueNumStore::GetSimdTypeOfVN(ValueNum vn)
-{
- VNSimdTypeInfo vnInfo;
-
- // The SIMD type is encoded as a function,
- // even though it is not actually a function.
- VNFuncApp simdType;
- if (GetVNFunc(vn, &simdType) && simdType.m_func == VNF_SimdType)
- {
- assert(simdType.m_arity == 2);
- vnInfo.m_simdSize = GetConstantInt32(simdType.m_args[0]);
- vnInfo.m_simdBaseJitType = (CorInfoType)GetConstantInt32(simdType.m_args[1]);
- return vnInfo;
- }
-
- vnInfo.m_simdSize = 0;
- vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF;
- return vnInfo;
-}
-
-//------------------------------------------------------------------------
-// GetVectorZeroSimdTypeOfVN: Returns the SIMD type information based on the given value number
-// if it's Vector*_get_Zero.
-//
-// Arguments:
-// vn - The value number.
-//
-// Return Value:
-// Returns VNSimdTypeInfo(0, CORINFO_TYPE_UNDEF) if the given value number has not been given a SIMD type
-// for a Vector*_get_Zero value number.
-//
-// REVIEW: Vector*_get_Zero nodes in VN currently encode their SIMD type for
-// conservative reasons. In the future, it might be possible not do this
-// on most platforms since Vector*_get_Zero's base type does not matter.
-VNSimdTypeInfo ValueNumStore::GetVectorZeroSimdTypeOfVN(ValueNum vn)
-{
-#ifdef FEATURE_HW_INTRINSICS
- // REVIEW: This will only return true if Vector*_get_Zero encodes
- // its base type as an argument. On XARCH there may be
- // scenarios where Vector*_get_Zero will not encode its base type;
- // therefore, returning false here.
- // Vector*_get_Zero does not have any arguments,
- // but its SIMD type is encoded as an argument.
- VNFuncApp funcApp;
- if (GetVNFunc(vn, &funcApp) && funcApp.m_arity == 1)
- {
- switch (funcApp.m_func)
- {
- case VNF_HWI_Vector128_get_Zero:
-#if defined(TARGET_XARCH)
- case VNF_HWI_Vector256_get_Zero:
-#elif defined(TARGET_ARM64)
- case VNF_HWI_Vector64_get_Zero:
-#endif
- {
- return GetSimdTypeOfVN(funcApp.m_args[0]);
- }
-
- default:
- {
- VNSimdTypeInfo vnInfo;
- vnInfo.m_simdSize = 0;
- vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF;
- return vnInfo;
- }
- }
- }
-#endif
-
- VNSimdTypeInfo vnInfo;
- vnInfo.m_simdSize = 0;
- vnInfo.m_simdBaseJitType = CORINFO_TYPE_UNDEF;
- return vnInfo;
-}
-#endif // FEATURE_SIMD
-
bool ValueNumStore::IsVNInt32Constant(ValueNum vn)
{
if (!IsVNConstant(vn))
#ifdef FEATURE_SIMD
case TYP_SIMD8:
+ {
+ simd8_t cnsVal = GetConstantSimd8(vn);
+ printf("Simd8Cns[0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1]);
+ break;
+ }
+
case TYP_SIMD12:
+ {
+ simd12_t cnsVal = GetConstantSimd12(vn);
+ printf("Simd12Cns[0x%08x, 0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1], cnsVal.u32[2]);
+ break;
+ }
+
case TYP_SIMD16:
+ {
+ simd16_t cnsVal = GetConstantSimd16(vn);
+ printf("Simd16Cns[0x%08x, 0x%08x, 0x%08x, 0x%08x]", cnsVal.u32[0], cnsVal.u32[1], cnsVal.u32[2],
+ cnsVal.u32[3]);
+ break;
+ }
+
case TYP_SIMD32:
{
- // Only the zero constant is currently allowed for SIMD types
- //
- INT64 val = ConstantValue<INT64>(vn);
- assert(val == 0);
- printf(" 0");
+ simd32_t cnsVal = GetConstantSimd32(vn);
+ printf("Simd32Cns[0x%016llx, 0x%016llx, 0x%016llx, 0x%016llx]", cnsVal.u64[0], cnsVal.u64[1],
+ cnsVal.u64[2], cnsVal.u64[3]);
+ break;
}
- break;
#endif // FEATURE_SIMD
// These should be unreached.
tree->gtVNPair.SetBoth(
vnStore->VNForHandle(ssize_t(tree->AsIntConCommon()->IconValue()), tree->GetIconHandleFlag()));
}
+#ifdef FEATURE_SIMD
+ else if (tree->IsCnsVec())
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+ assert(varTypeIsLong(typ));
+
+ simd8_t simd8Val = tree->AsVecCon()->gtSimd8Val;
+ tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val));
+ }
+#endif // FEATURE_SIMD
else if ((typ == TYP_LONG) || (typ == TYP_ULONG))
{
tree->gtVNPair.SetBoth(vnStore->VNForLongCon(INT64(tree->AsIntConCommon()->LngValue())));
#ifdef FEATURE_SIMD
case TYP_SIMD8:
+ {
+ simd8_t simd8Val;
+
+ // TODO-Cleanup: delete SIMD-typed CNS_INT nodes
+ if (tree->IsIntegralConst(0))
+ {
+ simd8Val = {};
+ }
+ else
+ {
+ simd8Val = tree->AsVecCon()->gtSimd8Val;
+ }
+
+ tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val));
+ break;
+ }
+
case TYP_SIMD12:
+ {
+ simd12_t simd12Val;
+
+ // TODO-Cleanup: delete SIMD-typed CNS_INT nodes
+ if (tree->IsIntegralConst(0))
+ {
+ simd12Val = {};
+ }
+ else
+ {
+ simd12Val = tree->AsVecCon()->gtSimd12Val;
+ }
+
+ tree->gtVNPair.SetBoth(vnStore->VNForSimd12Con(simd12Val));
+ break;
+ }
+
case TYP_SIMD16:
+ {
+ simd16_t simd16Val;
+
+ // TODO-Cleanup: delete SIMD-typed CNS_INT nodes
+ if (tree->IsIntegralConst(0))
+ {
+ simd16Val = {};
+ }
+ else
+ {
+ simd16Val = tree->AsVecCon()->gtSimd16Val;
+ }
+
+ tree->gtVNPair.SetBoth(vnStore->VNForSimd16Con(simd16Val));
+ break;
+ }
+
case TYP_SIMD32:
+ {
+ simd32_t simd32Val;
-#ifdef TARGET_64BIT
- // Only the zero constant is currently allowed for SIMD types
- //
- assert(tree->AsIntConCommon()->LngValue() == 0);
- tree->gtVNPair.SetBoth(vnStore->VNForLongCon(tree->AsIntConCommon()->LngValue()));
-#else // 32BIT
- assert(tree->AsIntConCommon()->IconValue() == 0);
- tree->gtVNPair.SetBoth(vnStore->VNForIntCon(int(tree->AsIntConCommon()->IconValue())));
-#endif
+ // TODO-Cleanup: delete SIMD-typed CNS_INT nodes
+ if (tree->IsIntegralConst(0))
+ {
+ simd32Val = {};
+ }
+ else
+ {
+ simd32Val = tree->AsVecCon()->gtSimd32Val;
+ }
+
+ tree->gtVNPair.SetBoth(vnStore->VNForSimd32Con(simd32Val));
break;
+ }
#endif // FEATURE_SIMD
case TYP_FLOAT:
+ {
tree->gtVNPair.SetBoth(vnStore->VNForFloatCon((float)tree->AsDblCon()->gtDconVal));
break;
+ }
+
case TYP_DOUBLE:
- tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->AsDblCon()->gtDconVal));
+ {
+#ifdef FEATURE_SIMD
+ if (tree->IsCnsVec())
+ {
+ // TODO-1stClassStructs: do not retype SIMD nodes
+ simd8_t simd8Val = tree->AsVecCon()->gtSimd8Val;
+ tree->gtVNPair.SetBoth(vnStore->VNForSimd8Con(simd8Val));
+ }
+ else
+#endif // FEATURE_SIMD
+ {
+ tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->AsDblCon()->gtDconVal));
+ }
break;
+ }
+
case TYP_REF:
if (tree->AsIntConCommon()->IconValue() == 0)
{
double GetConstantDouble(ValueNum argVN);
float GetConstantSingle(ValueNum argVN);
+#if defined(FEATURE_SIMD)
+ simd8_t GetConstantSimd8(ValueNum argVN);
+ simd12_t GetConstantSimd12(ValueNum argVN);
+ simd16_t GetConstantSimd16(ValueNum argVN);
+ simd32_t GetConstantSimd32(ValueNum argVN);
+#endif // FEATURE_SIMD
+
// Assumes that all the ValueNum arguments of each of these functions have been shown to represent constants.
// Assumes that "vnf" is a operator of the appropriate arity (unary for the first, binary for the second).
// Assume that "CanEvalForConstantArgs(vnf)" is true.
ValueNum VNForDoubleCon(double cnsVal);
ValueNum VNForByrefCon(target_size_t byrefVal);
+#if defined(FEATURE_SIMD)
+ ValueNum VNForSimd8Con(simd8_t cnsVal);
+ ValueNum VNForSimd12Con(simd12_t cnsVal);
+ ValueNum VNForSimd16Con(simd16_t cnsVal);
+ ValueNum VNForSimd32Con(simd32_t cnsVal);
+#endif // FEATURE_SIMD
+
#ifdef TARGET_64BIT
ValueNum VNForPtrSizeIntCon(INT64 cnsVal)
{
// Returns true iff the VN represents a (non-handle) constant.
bool IsVNConstant(ValueNum vn);
- bool IsVNVectorZero(ValueNum vn);
-
-#ifdef FEATURE_SIMD
- VNSimdTypeInfo GetSimdTypeOfVN(ValueNum vn);
-
- VNSimdTypeInfo GetVectorZeroSimdTypeOfVN(ValueNum vn);
-#endif
-
// Returns true iff the VN represents an integer constant.
bool IsVNInt32Constant(ValueNum vn);
return m_byrefCnsMap;
}
+#if defined(FEATURE_SIMD)
+ struct Simd8PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd8_t>
+ {
+ static bool Equals(simd8_t x, simd8_t y)
+ {
+ return x == y;
+ }
+
+ static unsigned GetHashCode(const simd8_t val)
+ {
+ unsigned hash = 0;
+
+ hash = static_cast<unsigned>(hash ^ val.u32[0]);
+ hash = static_cast<unsigned>(hash ^ val.u32[1]);
+
+ return hash;
+ }
+ };
+
+ typedef VNMap<simd8_t, Simd8PrimitiveKeyFuncs> Simd8ToValueNumMap;
+ Simd8ToValueNumMap* m_simd8CnsMap;
+ Simd8ToValueNumMap* GetSimd8CnsMap()
+ {
+ if (m_simd8CnsMap == nullptr)
+ {
+ m_simd8CnsMap = new (m_alloc) Simd8ToValueNumMap(m_alloc);
+ }
+ return m_simd8CnsMap;
+ }
+
+ struct Simd12PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd12_t>
+ {
+ static bool Equals(simd12_t x, simd12_t y)
+ {
+ return x == y;
+ }
+
+ static unsigned GetHashCode(const simd12_t val)
+ {
+ unsigned hash = 0;
+
+ hash = static_cast<unsigned>(hash ^ val.u32[0]);
+ hash = static_cast<unsigned>(hash ^ val.u32[1]);
+ hash = static_cast<unsigned>(hash ^ val.u32[2]);
+
+ return hash;
+ }
+ };
+
+ typedef VNMap<simd12_t, Simd12PrimitiveKeyFuncs> Simd12ToValueNumMap;
+ Simd12ToValueNumMap* m_simd12CnsMap;
+ Simd12ToValueNumMap* GetSimd12CnsMap()
+ {
+ if (m_simd12CnsMap == nullptr)
+ {
+ m_simd12CnsMap = new (m_alloc) Simd12ToValueNumMap(m_alloc);
+ }
+ return m_simd12CnsMap;
+ }
+
+ struct Simd16PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd16_t>
+ {
+ static bool Equals(simd16_t x, simd16_t y)
+ {
+ return x == y;
+ }
+
+ static unsigned GetHashCode(const simd16_t val)
+ {
+ unsigned hash = 0;
+
+ hash = static_cast<unsigned>(hash ^ val.u32[0]);
+ hash = static_cast<unsigned>(hash ^ val.u32[1]);
+ hash = static_cast<unsigned>(hash ^ val.u32[2]);
+ hash = static_cast<unsigned>(hash ^ val.u32[3]);
+
+ return hash;
+ }
+ };
+
+ typedef VNMap<simd16_t, Simd16PrimitiveKeyFuncs> Simd16ToValueNumMap;
+ Simd16ToValueNumMap* m_simd16CnsMap;
+ Simd16ToValueNumMap* GetSimd16CnsMap()
+ {
+ if (m_simd16CnsMap == nullptr)
+ {
+ m_simd16CnsMap = new (m_alloc) Simd16ToValueNumMap(m_alloc);
+ }
+ return m_simd16CnsMap;
+ }
+
+ struct Simd32PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd32_t>
+ {
+ static bool Equals(simd32_t x, simd32_t y)
+ {
+ return x == y;
+ }
+
+ static unsigned GetHashCode(const simd32_t val)
+ {
+ unsigned hash = 0;
+
+ hash = static_cast<unsigned>(hash ^ val.u32[0]);
+ hash = static_cast<unsigned>(hash ^ val.u32[1]);
+ hash = static_cast<unsigned>(hash ^ val.u32[2]);
+ hash = static_cast<unsigned>(hash ^ val.u32[3]);
+ hash = static_cast<unsigned>(hash ^ val.u32[4]);
+ hash = static_cast<unsigned>(hash ^ val.u32[5]);
+ hash = static_cast<unsigned>(hash ^ val.u32[6]);
+ hash = static_cast<unsigned>(hash ^ val.u32[7]);
+
+ return hash;
+ }
+ };
+
+ typedef VNMap<simd32_t, Simd32PrimitiveKeyFuncs> Simd32ToValueNumMap;
+ Simd32ToValueNumMap* m_simd32CnsMap;
+ Simd32ToValueNumMap* GetSimd32CnsMap()
+ {
+ if (m_simd32CnsMap == nullptr)
+ {
+ m_simd32CnsMap = new (m_alloc) Simd32ToValueNumMap(m_alloc);
+ }
+ return m_simd32CnsMap;
+ }
+#endif // FEATURE_SIMD
+
template <size_t NumArgs>
struct VNDefFuncAppKeyFuncs : public JitKeyFuncsDefEquals<VNDefFuncApp<NumArgs>>
{
typedef INT64 Type;
typedef double Lang;
};
+
+#if defined(FEATURE_SIMD)
+template <>
+struct ValueNumStore::VarTypConv<TYP_SIMD8>
+{
+ typedef simd8_t Type;
+ typedef simd8_t Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_SIMD12>
+{
+ typedef simd12_t Type;
+ typedef simd12_t Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_SIMD16>
+{
+ typedef simd16_t Type;
+ typedef simd16_t Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_SIMD32>
+{
+ typedef simd32_t Type;
+ typedef simd32_t Lang;
+};
+#endif // FEATURE_SIMD
+
template <>
struct ValueNumStore::VarTypConv<TYP_BYREF>
{
}
}
+#if defined(FEATURE_SIMD)
+template <>
+FORCEINLINE simd8_t ValueNumStore::SafeGetConstantValue<simd8_t>(Chunk* c, unsigned offset)
+{
+ assert(c->m_typ == TYP_SIMD8);
+ return reinterpret_cast<VarTypConv<TYP_SIMD8>::Lang*>(c->m_defs)[offset];
+}
+
+template <>
+FORCEINLINE simd12_t ValueNumStore::SafeGetConstantValue<simd12_t>(Chunk* c, unsigned offset)
+{
+ assert(c->m_typ == TYP_SIMD12);
+ return reinterpret_cast<VarTypConv<TYP_SIMD12>::Lang*>(c->m_defs)[offset];
+}
+
+template <>
+FORCEINLINE simd16_t ValueNumStore::SafeGetConstantValue<simd16_t>(Chunk* c, unsigned offset)
+{
+ assert(c->m_typ == TYP_SIMD16);
+ return reinterpret_cast<VarTypConv<TYP_SIMD16>::Lang*>(c->m_defs)[offset];
+}
+
+template <>
+FORCEINLINE simd32_t ValueNumStore::SafeGetConstantValue<simd32_t>(Chunk* c, unsigned offset)
+{
+ assert(c->m_typ == TYP_SIMD32);
+ return reinterpret_cast<VarTypConv<TYP_SIMD32>::Lang*>(c->m_defs)[offset];
+}
+
+template <>
+FORCEINLINE simd8_t ValueNumStore::ConstantValueInternal<simd8_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ assert(c->m_attribs == CEA_Const);
+
+ unsigned offset = ChunkOffset(vn);
+
+ assert(c->m_typ == TYP_SIMD8);
+ assert(!coerce);
+
+ return SafeGetConstantValue<simd8_t>(c, offset);
+}
+
+template <>
+FORCEINLINE simd12_t ValueNumStore::ConstantValueInternal<simd12_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ assert(c->m_attribs == CEA_Const);
+
+ unsigned offset = ChunkOffset(vn);
+
+ assert(c->m_typ == TYP_SIMD12);
+ assert(!coerce);
+
+ return SafeGetConstantValue<simd12_t>(c, offset);
+}
+
+template <>
+FORCEINLINE simd16_t ValueNumStore::ConstantValueInternal<simd16_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ assert(c->m_attribs == CEA_Const);
+
+ unsigned offset = ChunkOffset(vn);
+
+ assert(c->m_typ == TYP_SIMD16);
+ assert(!coerce);
+
+ return SafeGetConstantValue<simd16_t>(c, offset);
+}
+
+template <>
+FORCEINLINE simd32_t ValueNumStore::ConstantValueInternal<simd32_t>(ValueNum vn DEBUGARG(bool coerce))
+{
+ Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+ assert(c->m_attribs == CEA_Const);
+
+ unsigned offset = ChunkOffset(vn);
+
+ assert(c->m_typ == TYP_SIMD32);
+ assert(!coerce);
+
+ return SafeGetConstantValue<simd32_t>(c, offset);
+}
+#endif // FEATURE_SIMD
+
// Inline functions.
// static