IF_DEF(RWR_RRD_ARD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD ) // write reg , read reg2, read [adr]
IF_DEF(RWR_ARD_CNS, IS_AM_RD|IS_R1_WR, AMD_CNS) // write reg , read [adr], const
+IF_DEF(RWR_ARD_RRD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD) // write reg , read [adr], read reg2
IF_DEF(RWR_RRD_ARD_CNS, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD_CNS) // write reg , read reg2, read [adr], const
IF_DEF(RWR_RRD_ARD_RRD, IS_AM_RD|IS_R1_WR|IS_R2_RD|IS_R3_RD, AMD_CNS) // write reg , read reg2, read [adr], read reg3
case INS_vfnmsub213sd:
case INS_vfnmsub231sd:
case INS_vpmaskmovq:
+ case INS_vpgatherdq:
+ case INS_vpgatherqq:
+ case INS_vgatherdpd:
+ case INS_vgatherqpd:
return true;
default:
break;
if (dst->isContained() || (dst->isLclField() && (dst->gtRegNum == REG_NA)) || dst->isUsedFromSpillTemp())
{
// dst can only be a modrm
- assert(dst->isUsedFromMemory() || (dst->gtRegNum == REG_NA) ||
- instrIs3opImul(ins)); // dst on 3opImul isn't really the dst
+ // dst on 3opImul isn't really the dst
+ assert(dst->isUsedFromMemory() || (dst->gtRegNum == REG_NA) || instrIs3opImul(ins));
assert(!src->isUsedFromMemory());
memOp = dst;
emitCurIGsize += sz;
}
+//------------------------------------------------------------------------
+// IsAVX2GatherInstruction: return true if the instruction is AVX2 Gather
+//
+// Arguments:
+// ins - the instruction to check
+// Return Value:
+// true if the instruction is AVX2 Gather
+//
+bool IsAVX2GatherInstruction(instruction ins)
+{
+ switch (ins)
+ {
+ case INS_vpgatherdd:
+ case INS_vpgatherdq:
+ case INS_vpgatherqd:
+ case INS_vpgatherqq:
+ case INS_vgatherdps:
+ case INS_vgatherdpd:
+ case INS_vgatherqps:
+ case INS_vgatherqpd:
+ return true;
+ default:
+ return false;
+ }
+}
+
+//------------------------------------------------------------------------
+// emitIns_R_AR_R: Emits an AVX2 Gather instructions
+//
+// Arguments:
+// ins - the instruction to emit
+// attr - the instruction operand size
+// reg1 - the destination and first source operand
+// reg2 - the mask operand (encoded in VEX.vvvv)
+// base - the base register of address to load
+// index - the index register of VSIB
+// scale - the scale number of VSIB
+// offs - the offset added to the memory address from base
+//
+void emitter::emitIns_R_AR_R(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber base,
+ regNumber index,
+ int scale,
+ int offs)
+{
+ assert(IsAVX2GatherInstruction(ins));
+
+ instrDesc* id = emitNewInstrAmd(attr, offs);
+
+ id->idIns(ins);
+ id->idReg1(reg1);
+ id->idReg2(reg2);
+
+ id->idInsFmt(IF_RWR_ARD_RRD);
+ id->idAddr()->iiaAddrMode.amBaseReg = base;
+ id->idAddr()->iiaAddrMode.amIndxReg = index;
+ id->idAddr()->iiaAddrMode.amScale = emitEncodeSize((emitAttr)scale);
+
+ UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
+ id->idCodeSize(sz);
+
+ dispIns(id);
+ emitCurIGsize += sz;
+}
+
void emitter::emitIns_R_R_C(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs)
{
emitDispAddrMode(id);
break;
+ case IF_RWR_ARD_RRD:
+ if (ins == INS_vpgatherqd || ins == INS_vgatherqps)
+ {
+ attr = EA_16BYTE;
+ }
+ sstr = codeGen->genSizeStr(EA_ATTR(4));
+ printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
+ emitDispAddrMode(id);
+ printf(", %s", emitRegName(id->idReg2(), attr));
+ break;
+
case IF_RWR_RRD_ARD_CNS:
{
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
switch (id->idInsFmt())
{
case IF_RWR_RRD_ARD:
+ case IF_RWR_ARD_RRD:
case IF_RWR_RRD_ARD_CNS:
case IF_RWR_RRD_ARD_RRD:
{
break;
}
+ case IF_RWR_ARD_RRD:
+ {
+ assert(IsAVX2GatherInstruction(ins));
+ code = insCodeRM(ins);
+ dst = emitOutputAM(dst, id, code);
+ sz = emitSizeOfInsDsc(id);
+ break;
+ }
+
case IF_RWR_RRD_ARD_CNS:
case IF_RWR_RRD_ARD_RRD:
{
void emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs);
+void emitIns_R_AR_R(instruction ins,
+ emitAttr attr,
+ regNumber reg1,
+ regNumber reg2,
+ regNumber base,
+ regNumber index,
+ int scale,
+ int offs);
+
void emitIns_R_R_C(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs);
{
// Some AVX instructions here also have MemoryLoad sematics
- // Do we have 3 operands?
- if (HWIntrinsicInfo::lookupNumArgs(this) != 3)
+ // Do we have less than 3 operands?
+ if (HWIntrinsicInfo::lookupNumArgs(this) < 3)
{
return false;
}
- else // We have 3 operands/args
+ else // We have 3 or more operands/args
{
+ if (HWIntrinsicInfo::isAVX2GatherIntrinsic(gtHWIntrinsicId))
+ {
+ return true;
+ }
+
GenTreeArgList* argList = gtOp.gtOp1->AsArgList();
if ((gtHWIntrinsicId == NI_AVX_InsertVector128 || gtHWIntrinsicId == NI_AVX2_InsertVector128) &&
bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore()
{
#ifdef _TARGET_XARCH_
- // Some xarch instructions have MemoryLoad sematics
- HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(gtHWIntrinsicId);
- if ((category == HW_Category_MemoryLoad) || (category == HW_Category_MemoryStore))
- {
- return true;
- }
- else if (category == HW_Category_IMM)
- {
- // Some AVX instructions here also have MemoryLoad or MemoryStore sematics
-
- // Do we have 3 operands?
- if (HWIntrinsicInfo::lookupNumArgs(this) != 3)
- {
- return false;
- }
- else // We have 3 operands/args
- {
- GenTreeArgList* argList = gtOp.gtOp1->AsArgList();
-
- if ((gtHWIntrinsicId == NI_AVX_InsertVector128 || gtHWIntrinsicId == NI_AVX2_InsertVector128) &&
- (argList->Rest()->Current()->TypeGet() == TYP_I_IMPL)) // Is the type of the second arg TYP_I_IMPL?
- {
- // This is Avx/Avx2.InsertVector128
- return true;
- }
- else if ((gtHWIntrinsicId == NI_AVX_ExtractVector128 || gtHWIntrinsicId == NI_AVX2_ExtractVector128))
- {
- // This is Avx/Avx2.ExtractVector128
- return true;
- }
- }
- }
+ return OperIsMemoryLoad() || OperIsMemoryStore();
#endif // _TARGET_XARCH_
return false;
}
// happening.
void CopyCosts(const GenTree* const tree)
{
- INDEBUG(gtCostsInitialized =
- tree->gtCostsInitialized;) // If the 'tree' costs aren't initialized, we'll hit an assert below.
+ // If the 'tree' costs aren't initialized, we'll hit an assert below.
+ INDEBUG(gtCostsInitialized = tree->gtCostsInitialized;)
_gtCostEx = tree->gtCostEx;
_gtCostSz = tree->gtCostSz;
}
struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
{
NamedIntrinsic gtHWIntrinsicId;
+ var_types gtIndexBaseType; // for AVX2 Gather* intrinsics
GenTreeHWIntrinsic(var_types type, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size)
: GenTreeJitIntrinsic(GT_HWIntrinsic, type, nullptr, nullptr, baseType, size), gtHWIntrinsicId(hwIntrinsicID)
HWIntrinsicSwitchCaseBody emitSwCase)
{
assert(nonConstImmReg != REG_NA);
+ // AVX2 Gather intrinsics use managed non-const fallback since they have discrete imm8 value range
+ // that does work with the current compiler generated jump-table fallback
+ assert(!HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsic));
emitter* emit = getEmitter();
const unsigned maxByte = (unsigned)HWIntrinsicInfo::lookupImmUpperBound(intrinsic) + 1;
break;
}
+ case NI_AVX2_GatherVector128:
+ case NI_AVX2_GatherVector256:
+ case NI_AVX2_GatherMaskVector128:
+ case NI_AVX2_GatherMaskVector256:
+ {
+ GenTreeArgList* list = op1->AsArgList();
+ op1 = list->Current();
+ op1Reg = op1->gtRegNum;
+ genConsumeRegs(op1);
+
+ list = list->Rest();
+ op2 = list->Current();
+ op2Reg = op2->gtRegNum;
+ genConsumeRegs(op2);
+
+ list = list->Rest();
+ GenTree* op3 = list->Current();
+ genConsumeRegs(op3);
+
+ list = list->Rest();
+ GenTree* op4 = nullptr;
+ GenTree* lastOp = nullptr;
+ GenTree* indexOp = nullptr;
+
+ regNumber op3Reg = REG_NA;
+ regNumber op4Reg = REG_NA;
+ regNumber addrBaseReg = REG_NA;
+ regNumber addrIndexReg = REG_NA;
+ regNumber maskReg = node->ExtractTempReg(RBM_ALLFLOAT);
+
+ if (numArgs == 5)
+ {
+ assert(intrinsicId == NI_AVX2_GatherMaskVector128 || intrinsicId == NI_AVX2_GatherMaskVector256);
+ op4 = list->Current();
+ list = list->Rest();
+ lastOp = list->Current();
+ op3Reg = op3->gtRegNum;
+ op4Reg = op4->gtRegNum;
+ genConsumeRegs(op4);
+ addrBaseReg = op2Reg;
+ addrIndexReg = op3Reg;
+ indexOp = op3;
+
+ // copy op4Reg into the tmp mask register,
+ // the mask register will be cleared by gather instructions
+ emit->emitIns_R_R(INS_movaps, attr, maskReg, op4Reg);
+
+ if (targetReg != op1Reg)
+ {
+ // copy source vector to the target register for masking merge
+ emit->emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+ }
+ }
+ else
+ {
+ assert(intrinsicId == NI_AVX2_GatherVector128 || intrinsicId == NI_AVX2_GatherVector256);
+ addrBaseReg = op1Reg;
+ addrIndexReg = op2Reg;
+ indexOp = op2;
+ lastOp = op3;
+
+ // generate all-one mask vector
+ emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, maskReg, maskReg, maskReg);
+ }
+
+ bool isVector128GatherWithVector256Index = (targetType == TYP_SIMD16) && (indexOp->TypeGet() == TYP_SIMD32);
+
+ // hwintrinsiclistxarch.h uses Dword index instructions in default
+ if (varTypeIsLong(node->gtIndexBaseType))
+ {
+ switch (ins)
+ {
+ case INS_vpgatherdd:
+ ins = INS_vpgatherqd;
+ if (isVector128GatherWithVector256Index)
+ {
+ // YMM index in address mode
+ attr = emitTypeSize(TYP_SIMD32);
+ }
+ break;
+ case INS_vpgatherdq:
+ ins = INS_vpgatherqq;
+ break;
+ case INS_vgatherdps:
+ ins = INS_vgatherqps;
+ if (isVector128GatherWithVector256Index)
+ {
+ // YMM index in address mode
+ attr = emitTypeSize(TYP_SIMD32);
+ }
+ break;
+ case INS_vgatherdpd:
+ ins = INS_vgatherqpd;
+ break;
+ default:
+ unreached();
+ }
+ }
+
+ assert(lastOp->IsCnsIntOrI());
+ ssize_t ival = lastOp->AsIntCon()->IconValue();
+ assert((ival >= 0) && (ival <= 255));
+
+ assert(targetReg != maskReg);
+ assert(targetReg != addrIndexReg);
+ assert(maskReg != addrIndexReg);
+ emit->emitIns_R_AR_R(ins, attr, targetReg, maskReg, addrBaseReg, addrIndexReg, (int8_t)ival, 0);
+
+ break;
+ }
+
case NI_AVX_GetLowerHalf:
{
assert(op2 == nullptr);
HARDWARE_INTRINSIC(AVX2_ConvertToVector256UInt32, "ConvertToVector256UInt32", AVX2, -1, 32, 1, {INS_invalid, INS_pmovzxbd, INS_invalid, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int64, "ConvertToVector256Int64", AVX2, -1, 32, 1, {INS_pmovsxbq, INS_invalid, INS_pmovsxwq, INS_invalid, INS_pmovsxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2_ConvertToVector256UInt64, "ConvertToVector256UInt64", AVX2, -1, 32, 1, {INS_invalid, INS_pmovzxbq, INS_invalid, INS_pmovzxwq, INS_invalid, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX2_GatherVector128, "GatherVector128", AVX2, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2_GatherVector256, "GatherVector256", AVX2, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2_GatherMaskVector128, "GatherMaskVector128", AVX2, -1, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2_GatherMaskVector256, "GatherMaskVector256", AVX2, -1, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(AVX2_HorizontalAdd, "HorizontalAdd", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_invalid, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2_HorizontalAddSaturate, "HorizontalAddSaturate", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2_HorizontalSubtract, "HorizontalSubtract", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
if (op1->OperIsList())
{
-#if DEBUG
GenTreeArgList* list = op1->AsArgList();
numArgs = 0;
list = list->Rest();
} while (list != nullptr);
- assert(numArgs == 3);
-#endif
-
- return 3;
+ return numArgs;
}
GenTree* op2 = node->gtGetOp2();
return node->gtGetOp1()->AsArgList()->Rest()->Rest()->Current();
}
+ case 5:
+ {
+ assert(node->gtGetOp1() != nullptr);
+ assert(node->gtGetOp1()->OperIsList());
+ assert(node->gtGetOp2() == nullptr);
+ assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Current() != nullptr);
+ assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Rest() == nullptr);
+
+ return node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Current();
+ }
+
default:
{
unreached();
return 31; // enum FloatComparisonMode has 32 values
}
+ case NI_AVX2_GatherVector128:
+ case NI_AVX2_GatherVector256:
+ case NI_AVX2_GatherMaskVector128:
+ case NI_AVX2_GatherMaskVector256:
+ return 8;
+
default:
{
assert(HWIntrinsicInfo::HasFullRangeImm(id));
}
//------------------------------------------------------------------------
+// isInImmRange: Check if ival is valid for the intrinsic
+//
+// Arguments:
+// id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
+// ival -- the imm value to be checked
+//
+// Return Value:
+// true if ival is valid for the intrinsic
+//
+bool HWIntrinsicInfo::isInImmRange(NamedIntrinsic id, int ival)
+{
+ assert(HWIntrinsicInfo::lookupCategory(id) == HW_Category_IMM);
+
+ if (isAVX2GatherIntrinsic(id))
+ {
+ return ival == 1 || ival == 2 || ival == 4 || ival == 8;
+ }
+ else
+ {
+ return ival <= lookupImmUpperBound(id) && ival >= 0;
+ }
+}
+
+//------------------------------------------------------------------------
+// isAVX2GatherIntrinsic: Check if the intrinsic is AVX Gather*
+//
+// Arguments:
+// id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
+//
+// Return Value:
+// true if id is AVX Gather* intrinsic
+//
+bool HWIntrinsicInfo::isAVX2GatherIntrinsic(NamedIntrinsic id)
+{
+ switch (id)
+ {
+ case NI_AVX2_GatherVector128:
+ case NI_AVX2_GatherVector256:
+ case NI_AVX2_GatherMaskVector128:
+ case NI_AVX2_GatherMaskVector256:
+ return true;
+ default:
+ return false;
+ }
+}
+
+//------------------------------------------------------------------------
// isFullyImplementedIsa: Gets a value that indicates whether the InstructionSet is fully implemented
//
// Arguments:
assert(lastOp != nullptr);
// Full-range imm-intrinsics do not need the range-check
// because the imm-parameter of the intrinsic method is a byte.
- if (mustExpand && !HWIntrinsicInfo::HasFullRangeImm(intrinsic) && HWIntrinsicInfo::isImmOp(intrinsic, lastOp))
+ // AVX2 Gather intrinsics no not need the range-check
+ // because their imm-parameter have discrete valid values that are handle by managed code
+ if (mustExpand && !HWIntrinsicInfo::HasFullRangeImm(intrinsic) && HWIntrinsicInfo::isImmOp(intrinsic, lastOp) &&
+ !HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsic))
{
assert(!lastOp->IsCnsIntOrI());
GenTree* upperBoundNode =
if (!HWIntrinsicInfo::HasFullRangeImm(intrinsic))
{
if (!mustExpand && lastOp->IsCnsIntOrI() &&
- lastOp->AsIntCon()->IconValue() > HWIntrinsicInfo::lookupImmUpperBound(intrinsic))
+ !HWIntrinsicInfo::isInImmRange(intrinsic, (int)lastOp->AsIntCon()->IconValue()))
{
return nullptr;
}
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
op2 = getArgForHWIntrinsic(argType, argClass);
+ var_types op2Type;
+ if (intrinsic == NI_AVX2_GatherVector128 || intrinsic == NI_AVX2_GatherVector256)
+ {
+ assert(varTypeIsSIMD(op2->TypeGet()));
+ op2Type = getBaseTypeOfSIMDType(argClass);
+ }
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
op1 = getArgForHWIntrinsic(argType, argClass);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, baseType, simdSize);
+
+ if (intrinsic == NI_AVX2_GatherVector128 || intrinsic == NI_AVX2_GatherVector256)
+ {
+ assert(varTypeIsSIMD(op2->TypeGet()));
+ retNode->AsHWIntrinsic()->gtIndexBaseType = op2Type;
+ }
break;
}
+
default:
unreached();
}
}
break;
}
+
+ case NI_AVX2_GatherMaskVector128:
+ case NI_AVX2_GatherMaskVector256:
+ {
+ CORINFO_ARG_LIST_HANDLE argList = sig->args;
+ CORINFO_CLASS_HANDLE argClass;
+ var_types argType = TYP_UNKNOWN;
+ unsigned int sizeBytes;
+ baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
+ var_types retType = getSIMDTypeForSize(sizeBytes);
+
+ assert(sig->numArgs == 5);
+ CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
+ CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
+ CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3);
+ CORINFO_ARG_LIST_HANDLE arg5 = info.compCompHnd->getArgNext(arg4);
+
+ argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg5, &argClass)));
+ GenTree* op5 = getArgForHWIntrinsic(argType, argClass);
+ SetOpLclRelatedToSIMDIntrinsic(op5);
+
+ argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass)));
+ GenTree* op4 = getArgForHWIntrinsic(argType, argClass);
+ SetOpLclRelatedToSIMDIntrinsic(op4);
+
+ argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
+ var_types indexbaseType = getBaseTypeOfSIMDType(argClass);
+ GenTree* op3 = getArgForHWIntrinsic(argType, argClass);
+ SetOpLclRelatedToSIMDIntrinsic(op3);
+
+ argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
+ op2 = getArgForHWIntrinsic(argType, argClass);
+ SetOpLclRelatedToSIMDIntrinsic(op2);
+
+ argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
+ op1 = getArgForHWIntrinsic(argType, argClass);
+ SetOpLclRelatedToSIMDIntrinsic(op1);
+
+ GenTree* opList = new (this, GT_LIST) GenTreeArgList(op1, gtNewArgList(op2, op3, op4, op5));
+ retNode = new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(retType, opList, intrinsic, baseType, simdSize);
+ retNode->AsHWIntrinsic()->gtIndexBaseType = indexbaseType;
+ break;
+ }
+
default:
JITDUMP("Not implemented hardware intrinsic");
break;
static bool isImmOp(NamedIntrinsic id, const GenTree* op);
static int lookupImmUpperBound(NamedIntrinsic id);
+ static bool isInImmRange(NamedIntrinsic id, int ival);
+ static bool isAVX2GatherIntrinsic(NamedIntrinsic id);
static bool isFullyImplementedIsa(InstructionSet isa);
static bool isScalarIsa(InstructionSet isa);
INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, SSE38(0x2D), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
INST3(vpmaskmovd, "pmaskmovd", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Dword Loads and Stores
INST3(vpmaskmovq, "pmaskmovq", IUM_WR, SSE38(0x8E), BAD_CODE, SSE38(0x8C), INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Integer Packed Qword Loads and Stores
+INST3(vpgatherdd, "pgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword
+INST3(vpgatherqd, "pgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword
+INST3(vpgatherdq, "pgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices
+INST3(vpgatherqq, "pgatherqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Qword with Signed Dword Indices
+INST3(vgatherdps, "gatherdps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Dword Indices
+INST3(vgatherqps, "gatherqps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed SP FP values Using Signed Qword Indices
+INST3(vgatherdpd, "gatherdpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x92), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Dword Indices
+INST3(vgatherqpd, "gatherqpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x93), INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed DP FP Values Using Signed Qword Indices
INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// id nm um mr mi rm flags
if (!HWIntrinsicInfo::SupportsContainment(intrinsicId))
{
+ // AVX2 gather are not contaibable and always have constant IMM argument
+ if (HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsicId))
+ {
+ GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node);
+ assert(lastOp != nullptr);
+ MakeSrcContained(node, lastOp);
+ }
// Exit early if containment isn't supported
return;
}
#ifdef FEATURE_HW_INTRINSICS
case GT_HW_INTRINSIC_CHK:
#endif // FEATURE_HW_INTRINSICS
+
// Consumes arrLen & index - has no result
srcCount = 2;
assert(dstCount == 0);
if (op1->OperIsList())
{
assert(op2 == nullptr);
- assert(numArgs == 3);
+ assert(numArgs >= 3);
GenTreeArgList* argList = op1->AsArgList();
op2 = argList->Current();
argList = argList->Rest();
- op3 = argList->Current();
+ op3 = argList->Current();
+
+ while (argList->Rest() != nullptr)
+ {
+ argList = argList->Rest();
+ }
+
+ lastOp = argList->Current();
argList = argList->Rest();
- lastOp = op3;
assert(argList == nullptr);
}
else if (op2 != nullptr)
{
assert(numArgs == 1);
srcCount += BuildDelayFreeUses(op1);
+
+ buildUses = false;
+ break;
+ }
+
+ case NI_AVX2_GatherVector128:
+ case NI_AVX2_GatherVector256:
+ {
+ assert(numArgs == 3);
+ // Any pair of the index, mask, or destination registers should be different
+ srcCount += BuildOperandUses(op1);
+ srcCount += BuildDelayFreeUses(op2);
+
+ // get a tmp register for mask that will be cleared by gather instructions
+ buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs());
+ setInternalRegsDelayFree = true;
+
+ buildUses = false;
+ break;
+ }
+
+ case NI_AVX2_GatherMaskVector128:
+ case NI_AVX2_GatherMaskVector256:
+ {
+ assert(numArgs == 5);
+ // Any pair of the index, mask, or destination registers should be different
+ srcCount += BuildOperandUses(op1);
+ srcCount += BuildOperandUses(op2);
+ srcCount += BuildDelayFreeUses(op3);
+
+ assert(intrinsicTree->gtGetOp1()->OperIsList());
+ GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList();
+ GenTree* op4 = argList->Rest()->Rest()->Rest()->Current();
+ srcCount += BuildDelayFreeUses(op4);
+
+ // get a tmp register for mask that will be cleared by gather instructions
+ buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs());
+ setInternalRegsDelayFree = true;
+
buildUses = false;
break;
}
// Named jit intrinsics
-enum NamedIntrinsic : unsigned int
+enum NamedIntrinsic : unsigned short
{
NI_Illegal = 0,
NI_System_Enum_HasFlag = 1,