From: Jacek Blaszczynski Date: Wed, 28 Feb 2018 05:08:55 +0000 (+0100) Subject: Implement Shuffle* SSE2 hardware intrinsics X-Git-Tag: accepted/tizen/unified/20190422.045933~2784^2~1 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=978a5494fafede99d1ea9e8ca691b873e15885d8;p=platform%2Fupstream%2Fcoreclr.git Implement Shuffle* SSE2 hardware intrinsics --- diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 7bee3bf..20f71b7 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -3107,7 +3107,7 @@ protected: bool isScalarISA(InstructionSet isa); static int ivalOfHWIntrinsic(NamedIntrinsic intrinsic); unsigned simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig); - static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic); + static int numArgsOfHWIntrinsic(NamedIntrinsic intrinsic, GenTreeHWIntrinsic* node = nullptr); static GenTree* lastOpOfHWIntrinsic(GenTreeHWIntrinsic* node, int numArgs); static instruction insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type); static HWIntrinsicCategory categoryOfHWIntrinsic(NamedIntrinsic intrinsic); diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 4ffe342..0bd85f2 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -5535,15 +5535,28 @@ static bool isSseShift(instruction ins) } } -static bool isSSEExtract(instruction ins) +//------------------------------------------------------------------------ +// IsDstSrcImmAvxInstruction: check if instruction has RM R I format +// for all encodings: EVEX, VEX and legacy SSE +// +// Arguments: +// instruction -- processor instruction to check +// +// Return Value: +// true if instruction has RRI format +// +static bool IsDstSrcImmAvxInstruction(instruction ins) { switch (ins) { + case INS_extractps: case INS_pextrb: case INS_pextrw: case INS_pextrd: case INS_pextrq: - case INS_extractps: + case INS_pshufd: + case INS_pshufhw: + case INS_pshuflw: return true; default: return false; @@ -5554,7 +5567,7 @@ void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg, { // TODO-XARCH refactoring emitIns_R_R_I to handle SSE2/AVX2 shift as well as emitIns_R_I bool isShift = isSseShift(ins); - if (isSSEExtract(ins) || (UseVEXEncoding() && !isShift)) + if (IsDstSrcImmAvxInstruction(ins) || (UseVEXEncoding() && !isShift)) { emitIns_R_R_I(ins, attr, reg, reg1, ival); } diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 2f0e5f8..c4d5546 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -56,7 +56,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); int ival = Compiler::ivalOfHWIntrinsic(intrinsicID); - int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID); + int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, node); assert((flags & HW_Flag_NoCodeGen) == 0); diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 5365c6e..c426071 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -194,7 +194,7 @@ HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128, "LoadAligne HARDWARE_INTRINSIC(SSE2_LoadFence, "LoadFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_LoadScalarVector128, "LoadScalarVector128", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_LoadVector128, "LoadVector128", SSE2, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_MaskMove, "MaskMove", SSE2, -1, 16, 3, {INS_maskmovdqu,INS_maskmovdqu,INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_MaskMove, "MaskMove", SSE2, -1, 16, 3, {INS_maskmovdqu,INS_maskmovdqu,INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Max, "Max", SSE2, -1, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(SSE2_MemoryFence, "MemoryFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_MaxScalar, "MaxScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) @@ -217,14 +217,17 @@ HARDWARE_INTRINSIC(SSE2_ShiftLeftLogical128BitLane, "ShiftLeftL HARDWARE_INTRINSIC(SSE2_ShiftRightArithmetic, "ShiftRightArithmetic", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE2_ShiftRightLogical, "ShiftRightLogical", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE2_ShiftRightLogical128BitLane, "ShiftRightLogical128BitLane", SSE2, -1, 16, 2, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2_Shuffle, "Shuffle", SSE2, -1, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2_ShuffleHigh, "ShuffleHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2_ShuffleLow, "ShuffleLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE2_Sqrt, "Sqrt", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_SqrtScalar, "SqrtScalar", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Store, "Store", SSE2, -1, 16, 2, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreAligned, "StoreAligned", SSE2, -1, 16, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", SSE2, -1, 16, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_StoreHigh, "StoreHigh", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_StoreLow, "StoreLow", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_StoreScalar, "StoreScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsdsse2}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_Subtract, "Subtract", SSE2, -1, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_SubtractSaturate, "SubtractSaturate", SSE2, -1, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_SubtractScalar, "SubtractScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) @@ -320,7 +323,7 @@ HARDWARE_INTRINSIC(SSE42_CompareGreaterThan, "CompareGre // AVX Intrinsics HARDWARE_INTRINSIC(AVX_IsSupported, "get_IsSupported", AVX, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IsSupportedProperty, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_Add, "Add", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX_AddSubtract, "AddSubtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX_AddSubtract, "AddSubtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_And, "And", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX_AndNot, "AndNot", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_Blend, "Blend", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 86a9489..ef64c4e 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -219,19 +219,64 @@ unsigned Compiler::simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_I } //------------------------------------------------------------------------ -// numArgsOfHWIntrinsic: get the number of arguments +// numArgsOfHWIntrinsic: get the number of arguments based on table and +// if numArgs is -1 check number of arguments using GenTreeHWIntrinsic +// node unless it is nullptr // // Arguments: -// intrinsic -- id of the intrinsic function. +// intrinsic -- id of the intrinsic function +// node -- GenTreeHWIntrinsic* node with nullptr default value // // Return Value: // number of arguments // -int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic) +int Compiler::numArgsOfHWIntrinsic(NamedIntrinsic intrinsic, GenTreeHWIntrinsic* node) { assert(intrinsic != NI_Illegal); assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END); - return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs; + + int numArgs = hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs; + if (numArgs >= 0) + { + return numArgs; + } + + noway_assert(node != nullptr); + assert(numArgs == -1); + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + if (op2 != nullptr) + { + return 2; + } + + if (op1 != nullptr) + { + if (op1->OperIsList()) + { + numArgs = 0; + GenTreeArgList* list = op1->AsArgList(); + + while (list != nullptr) + { + numArgs++; + list = list->Rest(); + } + + assert(numArgs > 0); + return numArgs; + } + else + { + return 1; + } + } + else + { + return 0; + } } //------------------------------------------------------------------------ diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index d448afc..71da647 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -369,8 +369,10 @@ INST3( pcmpeqb, "pcmpeqb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, INST3( pcmpgtb, "pcmpgtb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x64)) // Packed compare 8-bit signed integers for greater than INST3( pshufd, "pshufd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x70)) // Packed shuffle of 32-bit integers +INST3( pshufhw, "pshufhw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x70)) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. +INST3( pshuflw, "pshuflw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x70)) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. INST3( pextrw, "pextrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC5)) // Extract 16-bit value into a r32 with zero extended to 32-bits -INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // packed insert word +INST3( pinsrw, "pinsrw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC4)) // Insert word at index INST3( punpckhbw, "punpckhbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x68)) // Packed logical (unsigned) widen ubyte to ushort (hi) INST3( punpcklbw, "punpcklbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x60)) // Packed logical (unsigned) widen ubyte to ushort (lo) @@ -448,7 +450,7 @@ INST3( phsubsw, "phsubsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS INST3( lddqu, "lddqu" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xF0)) // Load Unaligned integer INST3( movntdqa, "movntdqa" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2A)) // Load Double Quadword Non-Temporal Aligned Hint INST3( movddup, "movddup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x12)) // Replicate Double FP Values -INST3( movsldup, "movsldup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x12)) // Replicate even-indexed Single FP Values +INST3( movsldup, "movsldup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x12)) // Replicate even-indexed Single FP Values INST3( movshdup, "movshdup" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x16)) // Replicate odd-indexed Single FP Values INST3( phminposuw, "phminposuw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x41)) // Packed Horizontal Word Minimum INST3( mpsadbw, "mpsadbw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x42)) // Compute Multiple Packed Sums of Absolute Difference diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 3f1deb8..45799e3 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -2367,7 +2367,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) NamedIntrinsic intrinsicID = node->gtHWIntrinsicId; HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); - int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID); + int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, node); GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index fdb875a..296d558 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -2259,7 +2259,7 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID); HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID); HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID); - int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID); + int numArgs = Compiler::numArgsOfHWIntrinsic(intrinsicID, intrinsicTree); if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2) { @@ -2321,11 +2321,9 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) assert((flags & HW_Flag_NoCodeGen) == 0); - assert(numArgs != 0); - assert(numArgs != 1); - if (info->srcCount >= 2) { + assert(numArgs >= 2); LocationInfoListNode* op2Info = useList.Begin()->Next(); op2Info->info.isDelayFree = true; info->hasDelayFreeSrc = true;