1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
7 #ifdef FEATURE_HW_INTRINSICS
11 NamedIntrinsic intrinsicID;
12 const char* intrinsicName;
18 HWIntrinsicCategory category;
19 HWIntrinsicFlag flags;
22 static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
24 #define HARDWARE_INTRINSIC(id, name, isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
25 {NI_##id, name, InstructionSet_##isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
27 #include "hwintrinsiclistxarch.h"
30 extern const char* getHWIntrinsicName(NamedIntrinsic intrinsic)
32 return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].intrinsicName;
35 //------------------------------------------------------------------------
36 // lookupHWIntrinsicISA: map class name to InstructionSet value
39 // className -- class name in System.Runtime.Intrinsics.X86
42 // Id for the ISA class.
44 InstructionSet Compiler::lookupHWIntrinsicISA(const char* className)
46 if (className != nullptr)
48 if (className[0] == 'A')
50 if (strcmp(className, "Aes") == 0)
52 return InstructionSet_AES;
54 else if (strcmp(className, "Avx") == 0)
56 return InstructionSet_AVX;
58 else if (strcmp(className, "Avx2") == 0)
60 return InstructionSet_AVX2;
63 if (className[0] == 'S')
65 if (strcmp(className, "Sse") == 0)
67 return InstructionSet_SSE;
69 else if (strcmp(className, "Sse2") == 0)
71 return InstructionSet_SSE2;
73 else if (strcmp(className, "Sse3") == 0)
75 return InstructionSet_SSE3;
77 else if (strcmp(className, "Ssse3") == 0)
79 return InstructionSet_SSSE3;
81 else if (strcmp(className, "Sse41") == 0)
83 return InstructionSet_SSE41;
85 else if (strcmp(className, "Sse42") == 0)
87 return InstructionSet_SSE42;
91 if (strcmp(className, "Bmi1") == 0)
93 return InstructionSet_BMI1;
95 else if (strcmp(className, "Bmi2") == 0)
97 return InstructionSet_BMI2;
99 else if (strcmp(className, "Fma") == 0)
101 return InstructionSet_FMA;
103 else if (strcmp(className, "Lzcnt") == 0)
105 return InstructionSet_LZCNT;
107 else if (strcmp(className, "Pclmulqdq") == 0)
109 return InstructionSet_PCLMULQDQ;
111 else if (strcmp(className, "Popcnt") == 0)
113 return InstructionSet_POPCNT;
117 JITDUMP("Unsupported ISA.\n");
118 return InstructionSet_ILLEGAL;
121 //------------------------------------------------------------------------
122 // lookupHWIntrinsic: map intrinsic name to named intrinsic value
125 // methodName -- name of the intrinsic function.
126 // isa -- instruction set of the intrinsic.
129 // Id for the hardware intrinsic
131 // TODO-Throughput: replace sequential search by binary search
132 NamedIntrinsic Compiler::lookupHWIntrinsic(const char* methodName, InstructionSet isa)
134 NamedIntrinsic result = NI_Illegal;
135 if (isa != InstructionSet_ILLEGAL)
137 for (int i = 0; i < NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1; i++)
139 if (isa == hwIntrinsicInfoArray[i].isa && strcmp(methodName, hwIntrinsicInfoArray[i].intrinsicName) == 0)
141 result = hwIntrinsicInfoArray[i].intrinsicID;
149 //------------------------------------------------------------------------
150 // isaOfHWIntrinsic: map named intrinsic value to its instruction set
153 // intrinsic -- id of the intrinsic function.
156 // instruction set of the intrinsic.
158 InstructionSet Compiler::isaOfHWIntrinsic(NamedIntrinsic intrinsic)
160 assert(intrinsic != NI_Illegal);
161 assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
162 return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].isa;
165 //------------------------------------------------------------------------
166 // ivalOfHWIntrinsic: get the imm8 value of this intrinsic from the hwIntrinsicInfoArray table
169 // intrinsic -- id of the intrinsic function.
172 // The imm8 value that is implicit for this intrinsic, or -1 for intrinsics that do not take an immediate, or for
173 // which the immediate is an explicit argument.
175 int Compiler::ivalOfHWIntrinsic(NamedIntrinsic intrinsic)
177 assert(intrinsic != NI_Illegal);
178 assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
179 return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].ival;
182 //------------------------------------------------------------------------
183 // simdSizeOfHWIntrinsic: get the SIMD size of this intrinsic
186 // intrinsic -- id of the intrinsic function.
189 // the SIMD size of this intrinsic
190 // - from the hwIntrinsicInfoArray table if intrinsic has NO HW_Flag_UnfixedSIMDSize
191 // - from the signature if intrinsic has HW_Flag_UnfixedSIMDSize
193 // Note - this function is only used by the importer
194 // after importation (i.e., codegen), we can get the SIMD size from GenTreeHWIntrinsic IR
195 unsigned Compiler::simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig)
197 assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
199 HWIntrinsicFlag flags = flagsOfHWIntrinsic(intrinsic);
201 if ((flags & HW_Flag_UnfixedSIMDSize) == 0)
203 return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].simdSize;
206 CORINFO_CLASS_HANDLE typeHnd = nullptr;
208 if (JITtype2varType(sig->retType) == TYP_STRUCT)
210 typeHnd = sig->retTypeSigClass;
214 assert((flags & HW_Flag_BaseTypeFromFirstArg) != 0);
215 typeHnd = info.compCompHnd->getArgClass(sig, sig->args);
218 unsigned simdSize = 0;
219 var_types baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, &simdSize);
220 assert(simdSize > 0 && baseType != TYP_UNKNOWN);
224 // TODO_XARCH-CQ - refactoring of numArgsOfHWIntrinsic fast path into inlinable
225 // function and slow local static function may increase performance significantly
227 //------------------------------------------------------------------------
228 // numArgsOfHWIntrinsic: gets the number of arguments for the hardware intrinsic.
229 // This attempts to do a table based lookup but will fallback to the number
230 // of operands in 'node' if the table entry is -1.
233 // node -- GenTreeHWIntrinsic* node with nullptr default value
236 // number of arguments
238 int Compiler::numArgsOfHWIntrinsic(GenTreeHWIntrinsic* node)
240 assert(node != nullptr);
242 NamedIntrinsic intrinsic = node->gtHWIntrinsicId;
244 assert(intrinsic != NI_Illegal);
245 assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
247 int numArgs = hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs;
253 assert(numArgs == -1);
255 GenTree* op1 = node->gtGetOp1();
256 GenTree* op2 = node->gtGetOp2();
265 if (op1->OperIsList())
268 GenTreeArgList* list = op1->AsArgList();
270 while (list != nullptr)
290 //------------------------------------------------------------------------
291 // lastOpOfHWIntrinsic: get the last operand of a HW intrinsic
294 // node -- the intrinsic node.
295 // numArgs-- number of argument
298 // number of arguments
300 GenTree* Compiler::lastOpOfHWIntrinsic(GenTreeHWIntrinsic* node, int numArgs)
302 GenTree* op1 = node->gtGetOp1();
303 GenTree* op2 = node->gtGetOp2();
309 assert(op1 != nullptr);
312 assert(op2 != nullptr);
315 assert(op1->OperIsList());
316 assert(op1->AsArgList()->Rest()->Rest()->Current() != nullptr);
317 assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
318 return op1->AsArgList()->Rest()->Rest()->Current();
325 //------------------------------------------------------------------------
326 // insOfHWIntrinsic: get the instruction of the given intrinsic
329 // intrinsic -- id of the intrinsic function.
330 // type -- vector base type of this intrinsic
333 // the instruction of the given intrinsic on the base type
334 // return INS_invalid for unsupported base types
336 instruction Compiler::insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type)
338 assert(intrinsic != NI_Illegal);
339 assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
340 assert(type >= TYP_BYTE && type <= TYP_DOUBLE);
341 return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].ins[type - TYP_BYTE];
344 //------------------------------------------------------------------------
345 // categoryOfHWIntrinsic: get the category of the given intrinsic
348 // intrinsic -- id of the intrinsic function.
351 // the category of the given intrinsic
353 HWIntrinsicCategory Compiler::categoryOfHWIntrinsic(NamedIntrinsic intrinsic)
355 assert(intrinsic != NI_Illegal);
356 assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
357 return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].category;
360 //------------------------------------------------------------------------
361 // HWIntrinsicFlag: get the flags of the given intrinsic
364 // intrinsic -- id of the intrinsic function.
367 // the flags of the given intrinsic
369 HWIntrinsicFlag Compiler::flagsOfHWIntrinsic(NamedIntrinsic intrinsic)
371 assert(intrinsic != NI_Illegal);
372 assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
373 return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].flags;
376 //------------------------------------------------------------------------
377 // getArgForHWIntrinsic: get the argument from the stack and match the signature
380 // argType -- the required type of argument
381 // argClass -- the class handle of argType
384 // get the argument at the given index from the stack and match the signature
386 GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass)
388 GenTree* arg = nullptr;
389 if (argType == TYP_STRUCT)
391 unsigned int argSizeBytes;
392 var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes);
393 argType = getSIMDTypeForSize(argSizeBytes);
394 assert((argType == TYP_SIMD32) || (argType == TYP_SIMD16));
395 arg = impSIMDPopStack(argType);
396 assert((arg->TypeGet() == TYP_SIMD16) || (arg->TypeGet() == TYP_SIMD32));
400 assert(varTypeIsArithmetic(argType));
401 arg = impPopStack().val;
402 assert(varTypeIsArithmetic(arg->TypeGet()));
403 assert(genActualType(arg->gtType) == genActualType(argType));
408 //------------------------------------------------------------------------
409 // immUpperBoundOfHWIntrinsic: get the max imm-value of non-full-range IMM intrinsic
412 // intrinsic -- intrinsic ID
415 // the max imm-value of non-full-range IMM intrinsic
417 int Compiler::immUpperBoundOfHWIntrinsic(NamedIntrinsic intrinsic)
419 assert(categoryOfHWIntrinsic(intrinsic) == HW_Category_IMM);
423 case NI_AVX_CompareScalar:
424 return 31; // enum FloatComparisonMode has 32 values
427 assert((flagsOfHWIntrinsic(intrinsic) & HW_Flag_FullRangeIMM) != 0);
432 //------------------------------------------------------------------------
433 // impNonConstFallback: convert certain SSE2/AVX2 shift intrinsic to its semantic alternative when the imm-arg is
434 // not a compile-time constant
437 // intrinsic -- intrinsic ID
438 // simdType -- Vector type
439 // baseType -- base type of the Vector128/256<T>
442 // return the IR of semantic alternative on non-const imm-arg
444 GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType)
446 assert((flagsOfHWIntrinsic(intrinsic) & HW_Flag_NoJmpTableIMM) != 0);
449 case NI_SSE2_ShiftLeftLogical:
450 case NI_SSE2_ShiftRightArithmetic:
451 case NI_SSE2_ShiftRightLogical:
452 case NI_AVX2_ShiftLeftLogical:
453 case NI_AVX2_ShiftRightArithmetic:
454 case NI_AVX2_ShiftRightLogical:
456 GenTree* op2 = impPopStack().val;
457 GenTree* op1 = impSIMDPopStack(simdType);
459 gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_SSE2_ConvertScalarToVector128Int32, TYP_INT, 16);
460 return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, intrinsic, baseType, genTypeSize(simdType));
469 //------------------------------------------------------------------------
470 // isImmHWIntrinsic: check the intrinsic is a imm-intrinsic overload or not
473 // intrinsic -- intrinsic ID
474 // lastOp -- the last operand of the intrinsic that may point to the imm-arg
477 // Return true iff the intrinsics is an imm-intrinsic overload.
478 // Note: that some intrinsics, with HW_Flag_MaybeIMM set, have both imm (integer immediate) and vector (i.e.
479 // non-TYP_INT) overloads.
481 bool Compiler::isImmHWIntrinsic(NamedIntrinsic intrinsic, GenTree* lastOp)
483 if (categoryOfHWIntrinsic(intrinsic) != HW_Category_IMM)
488 if ((flagsOfHWIntrinsic(intrinsic) & HW_Flag_MaybeIMM) != 0 && genActualType(lastOp->TypeGet()) != TYP_INT)
496 //------------------------------------------------------------------------
497 // addRangeCheckIfNeeded: add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic
500 // intrinsic -- intrinsic ID
501 // lastOp -- the last operand of the intrinsic that points to the imm-arg
502 // mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics
505 // add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic, which would throw ArgumentOutOfRangeException
506 // when the imm-argument is not in the valid range
508 GenTree* Compiler::addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand)
510 assert(lastOp != nullptr);
511 // Full-range imm-intrinsics do not need the range-check
512 // because the imm-parameter of the intrinsic method is a byte.
513 if (mustExpand && ((flagsOfHWIntrinsic(intrinsic) & HW_Flag_FullRangeIMM) == 0) &&
514 isImmHWIntrinsic(intrinsic, lastOp))
516 assert(!lastOp->IsCnsIntOrI());
517 GenTree* upperBoundNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, immUpperBoundOfHWIntrinsic(intrinsic));
518 GenTree* index = nullptr;
519 if ((lastOp->gtFlags & GTF_SIDE_EFFECT) != 0)
521 index = fgInsertCommaFormTemp(&lastOp);
525 index = gtCloneExpr(lastOp);
527 GenTreeBoundsChk* hwIntrinsicChk = new (this, GT_HW_INTRINSIC_CHK)
528 GenTreeBoundsChk(GT_HW_INTRINSIC_CHK, TYP_VOID, index, upperBoundNode, SCK_RNGCHK_FAIL);
529 hwIntrinsicChk->gtThrowKind = SCK_ARG_RNG_EXCPN;
530 return gtNewOperNode(GT_COMMA, lastOp->TypeGet(), hwIntrinsicChk, lastOp);
538 //------------------------------------------------------------------------
539 // isFullyImplmentedISAClass: return true if all the hardware intrinsics
540 // of this ISA are implemented in RyuJIT.
543 // isa - Instruction set
545 // true - all the hardware intrinsics of "isa" exposed in CoreFX
546 // System.Runtime.Intrinsics.Experimental assembly are implemented in RyuJIT.
548 bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
552 // These ISAs have no implementation
553 case InstructionSet_AES:
554 case InstructionSet_BMI1:
555 case InstructionSet_BMI2:
556 case InstructionSet_FMA:
557 case InstructionSet_PCLMULQDQ:
560 // These ISAs are partially implemented
561 case InstructionSet_AVX:
562 case InstructionSet_AVX2:
563 case InstructionSet_SSE42:
566 // These ISAs are fully implemented
567 case InstructionSet_SSE:
568 case InstructionSet_SSE2:
569 case InstructionSet_SSE3:
570 case InstructionSet_SSSE3:
571 case InstructionSet_SSE41:
572 case InstructionSet_LZCNT:
573 case InstructionSet_POPCNT:
581 //------------------------------------------------------------------------
585 // isa - Instruction set
587 // true - if "isa" only contains scalar instructions
589 bool Compiler::isScalarISA(InstructionSet isa)
593 case InstructionSet_BMI1:
594 case InstructionSet_BMI2:
595 case InstructionSet_LZCNT:
596 case InstructionSet_POPCNT:
604 //------------------------------------------------------------------------
605 // compSupportsHWIntrinsic: compiler support of hardware intrinsics
608 // isa - Instruction set
611 // - isa is a scalar ISA
612 // - isa is a SIMD ISA and featureSIMD=true
613 // - isa is fully implemented or EnableIncompleteISAClass=true
614 bool Compiler::compSupportsHWIntrinsic(InstructionSet isa)
616 return (featureSIMD || isScalarISA(isa)) && (
618 JitConfig.EnableIncompleteISAClass() ||
620 isFullyImplmentedISAClass(isa));
623 //------------------------------------------------------------------------
624 // hwIntrinsicSignatureTypeSupported: platform support of hardware intrinsics
627 // retType - return type
628 // sig - intrinsic signature
629 // flags - flags of the intrinsics
632 // Returns true iff the given type signature is supported
634 // - This is only used on 32-bit systems to determine whether the signature uses no 64-bit registers.
635 // - The `retType` is passed to avoid another call to the type system, as it has already been retrieved.
636 bool Compiler::hwIntrinsicSignatureTypeSupported(var_types retType, CORINFO_SIG_INFO* sig, HWIntrinsicFlag flags)
639 CORINFO_CLASS_HANDLE argClass;
641 if ((flags & HW_Flag_64BitOnly) != 0)
645 else if ((flags & HW_Flag_SecondArgMaybe64Bit) != 0)
647 assert(sig->numArgs >= 2);
648 CorInfoType corType =
649 strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(sig->args), &argClass));
650 return !varTypeIsLong(JITtype2varType(corType));
653 return !varTypeIsLong(retType);
659 //------------------------------------------------------------------------
660 // impIsTableDrivenHWIntrinsic:
663 // category - category of a HW intrinsic
666 // returns true if this category can be table-driven in the importer
668 static bool impIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
670 // HW_Flag_NoCodeGen implies this intrinsic should be manually morphed in the importer.
671 return category != HW_Category_Special && category != HW_Category_Scalar &&
672 ((flags & (HW_Flag_NoCodeGen | HW_Flag_SpecialImport)) == 0);
675 //------------------------------------------------------------------------
676 // impHWIntrinsic: dispatch hardware intrinsics to their own implementation
679 // intrinsic -- id of the intrinsic function.
680 // method -- method handle of the intrinsic function.
681 // sig -- signature of the intrinsic call
684 // the expanded intrinsic.
686 GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
687 CORINFO_METHOD_HANDLE method,
688 CORINFO_SIG_INFO* sig,
691 InstructionSet isa = isaOfHWIntrinsic(intrinsic);
692 HWIntrinsicCategory category = categoryOfHWIntrinsic(intrinsic);
693 HWIntrinsicFlag flags = flagsOfHWIntrinsic(intrinsic);
694 int numArgs = sig->numArgs;
695 var_types retType = JITtype2varType(sig->retType);
696 var_types baseType = TYP_UNKNOWN;
698 if ((retType == TYP_STRUCT) && featureSIMD)
700 unsigned int sizeBytes;
701 baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
702 retType = getSIMDTypeForSize(sizeBytes);
703 assert(sizeBytes != 0);
706 // This intrinsic is supported if
707 // - the ISA is available on the underlying hardware (compSupports returns true)
708 // - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
709 // - intrinsics do not require 64-bit registers (r64) on 32-bit platforms (signatureTypeSupproted returns
712 compSupports(isa) && compSupportsHWIntrinsic(isa) && hwIntrinsicSignatureTypeSupported(retType, sig, flags);
714 if (category == HW_Category_IsSupportedProperty)
716 return gtNewIconNode(issupported);
718 // - calling to unsupported intrinsics must throw PlatforNotSupportedException
719 else if (!issupported)
721 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
723 // Avoid checking stacktop for 0-op intrinsics
724 if (sig->numArgs > 0 && isImmHWIntrinsic(intrinsic, impStackTop().val))
726 GenTree* lastOp = impStackTop().val;
727 // The imm-HWintrinsics that do not accept all imm8 values may throw
728 // ArgumentOutOfRangeException when the imm argument is not in the valid range
729 if ((flags & HW_Flag_FullRangeIMM) == 0)
731 if (!mustExpand && lastOp->IsCnsIntOrI() &&
732 lastOp->AsIntCon()->IconValue() > immUpperBoundOfHWIntrinsic(intrinsic))
738 if (!lastOp->IsCnsIntOrI())
740 if ((flags & HW_Flag_NoJmpTableIMM) == 0 && !mustExpand)
742 // When the imm-argument is not a constant and we are not being forced to expand, we need to
743 // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
744 // intrinsic method is recursive and will be forced to expand, at which point
745 // we emit some less efficient fallback code.
748 else if ((flags & HW_Flag_NoJmpTableIMM) != 0)
750 return impNonConstFallback(intrinsic, retType, baseType);
755 bool isTableDriven = impIsTableDrivenHWIntrinsic(category, flags);
757 if (isTableDriven && ((category == HW_Category_MemoryStore) ||
758 ((flags & (HW_Flag_BaseTypeFromFirstArg | HW_Flag_BaseTypeFromSecondArg)) != 0)))
760 if ((flags & HW_Flag_BaseTypeFromFirstArg) != 0)
762 baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
766 assert((category == HW_Category_MemoryStore) || ((flags & HW_Flag_BaseTypeFromSecondArg) != 0));
767 CORINFO_ARG_LIST_HANDLE secondArg = info.compCompHnd->getArgNext(sig->args);
768 CORINFO_CLASS_HANDLE secondArgClass = info.compCompHnd->getArgClass(sig, secondArg);
769 baseType = getBaseTypeOfSIMDType(secondArgClass);
771 if (baseType == TYP_UNKNOWN) // the second argument is not a vector
773 baseType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, secondArg, &secondArgClass)));
774 assert(baseType != TYP_STRUCT);
778 assert(baseType != TYP_UNKNOWN);
781 if (((flags & (HW_Flag_OneTypeGeneric | HW_Flag_TwoTypeGeneric)) != 0) && ((flags & HW_Flag_SpecialImport) == 0))
783 if (!varTypeIsArithmetic(baseType))
785 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
788 if ((flags & HW_Flag_TwoTypeGeneric) != 0)
790 // StaticCast<T, U> has two type parameters.
791 assert(numArgs == 1);
792 var_types srcType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
793 if (!varTypeIsArithmetic(srcType))
795 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
800 if ((flags & HW_Flag_NoFloatingPointUsed) == 0)
802 // Set `compFloatingPointUsed` to cover the scenario where an intrinsic is being on SIMD fields, but
803 // where no SIMD local vars are in use. This is the same logic as is used for FEATURE_SIMD.
804 compFloatingPointUsed = true;
807 // table-driven importer of simple intrinsics
810 unsigned simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
811 CORINFO_ARG_LIST_HANDLE argList = sig->args;
812 CORINFO_CLASS_HANDLE argClass;
813 var_types argType = TYP_UNKNOWN;
815 assert(numArgs >= 0);
816 assert(insOfHWIntrinsic(intrinsic, baseType) != INS_invalid);
817 assert(simdSize == 32 || simdSize == 16);
819 GenTreeHWIntrinsic* retNode = nullptr;
820 GenTree* op1 = nullptr;
821 GenTree* op2 = nullptr;
826 retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
829 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
830 op1 = getArgForHWIntrinsic(argType, argClass);
831 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
834 argType = JITtype2varType(
835 strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(argList), &argClass)));
836 op2 = getArgForHWIntrinsic(argType, argClass);
838 op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand);
840 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
841 op1 = getArgForHWIntrinsic(argType, argClass);
843 retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, baseType, simdSize);
848 CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
849 CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
851 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
852 GenTree* op3 = getArgForHWIntrinsic(argType, argClass);
854 op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand);
856 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
857 op2 = getArgForHWIntrinsic(argType, argClass);
859 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
860 op1 = getArgForHWIntrinsic(argType, argClass);
862 retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, baseType, simdSize);
869 bool isMemoryStore = retNode->OperIsMemoryStore();
870 if (isMemoryStore || retNode->OperIsMemoryLoad())
874 // A MemoryStore operation is an assignment
875 retNode->gtFlags |= GTF_ASG;
878 // This operation contains an implicit indirection
879 // it could point into the gloabal heap or
880 // it could throw a null reference exception.
882 retNode->gtFlags |= (GTF_GLOB_REF | GTF_EXCEPT);
887 // other intrinsics need special importation
890 case InstructionSet_SSE:
891 return impSSEIntrinsic(intrinsic, method, sig, mustExpand);
892 case InstructionSet_SSE2:
893 return impSSE2Intrinsic(intrinsic, method, sig, mustExpand);
894 case InstructionSet_SSE42:
895 return impSSE42Intrinsic(intrinsic, method, sig, mustExpand);
896 case InstructionSet_AVX:
897 case InstructionSet_AVX2:
898 return impAvxOrAvx2Intrinsic(intrinsic, method, sig, mustExpand);
900 case InstructionSet_AES:
901 return impAESIntrinsic(intrinsic, method, sig, mustExpand);
902 case InstructionSet_BMI1:
903 return impBMI1Intrinsic(intrinsic, method, sig, mustExpand);
904 case InstructionSet_BMI2:
905 return impBMI2Intrinsic(intrinsic, method, sig, mustExpand);
906 case InstructionSet_FMA:
907 return impFMAIntrinsic(intrinsic, method, sig, mustExpand);
908 case InstructionSet_LZCNT:
909 return impLZCNTIntrinsic(intrinsic, method, sig, mustExpand);
910 case InstructionSet_PCLMULQDQ:
911 return impPCLMULQDQIntrinsic(intrinsic, method, sig, mustExpand);
912 case InstructionSet_POPCNT:
913 return impPOPCNTIntrinsic(intrinsic, method, sig, mustExpand);
919 GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
920 CORINFO_METHOD_HANDLE method,
921 CORINFO_SIG_INFO* sig,
924 GenTree* retNode = nullptr;
925 GenTree* op1 = nullptr;
926 GenTree* op2 = nullptr;
927 GenTree* op3 = nullptr;
928 GenTree* op4 = nullptr;
929 int simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
931 // The Prefetch and StoreFence intrinsics don't take any SIMD operands
932 // and have a simdSize of 0
933 assert((simdSize == 16) || (simdSize == 0));
937 case NI_SSE_MoveMask:
938 assert(sig->numArgs == 1);
939 assert(JITtype2varType(sig->retType) == TYP_INT);
940 assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
941 op1 = impSIMDPopStack(TYP_SIMD16);
942 retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, simdSize);
945 case NI_SSE_Prefetch0:
946 case NI_SSE_Prefetch1:
947 case NI_SSE_Prefetch2:
948 case NI_SSE_PrefetchNonTemporal:
950 assert(sig->numArgs == 1);
951 assert(JITtype2varType(sig->retType) == TYP_VOID);
952 op1 = impPopStack().val;
953 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, intrinsic, TYP_UBYTE, 0);
957 case NI_SSE_StoreFence:
958 assert(sig->numArgs == 0);
959 assert(JITtype2varType(sig->retType) == TYP_VOID);
960 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, 0);
964 JITDUMP("Not implemented hardware intrinsic");
970 GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic,
971 CORINFO_METHOD_HANDLE method,
972 CORINFO_SIG_INFO* sig,
975 GenTree* retNode = nullptr;
976 GenTree* op1 = nullptr;
977 GenTree* op2 = nullptr;
979 int simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
980 var_types baseType = TYP_UNKNOWN;
981 var_types retType = TYP_UNKNOWN;
983 // The fencing intrinsics don't take any operands and simdSize is 0
984 assert((simdSize == 16) || (simdSize == 0));
986 CORINFO_ARG_LIST_HANDLE argList = sig->args;
987 CORINFO_CLASS_HANDLE argClass;
988 var_types argType = TYP_UNKNOWN;
992 case NI_SSE2_CompareLessThan:
994 assert(sig->numArgs == 2);
995 op2 = impSIMDPopStack(TYP_SIMD16);
996 op1 = impSIMDPopStack(TYP_SIMD16);
997 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
998 if (baseType == TYP_DOUBLE)
1000 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, baseType, simdSize);
1005 gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareGreaterThan, baseType, simdSize);
1010 case NI_SSE2_LoadFence:
1011 case NI_SSE2_MemoryFence:
1013 assert(sig->numArgs == 0);
1014 assert(JITtype2varType(sig->retType) == TYP_VOID);
1015 assert(simdSize == 0);
1017 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, simdSize);
1021 case NI_SSE2_MoveMask:
1023 assert(sig->numArgs == 1);
1024 retType = JITtype2varType(sig->retType);
1025 assert(retType == TYP_INT);
1026 op1 = impSIMDPopStack(TYP_SIMD16);
1027 baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
1028 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1032 case NI_SSE2_StoreNonTemporal:
1034 assert(sig->numArgs == 2);
1035 assert(JITtype2varType(sig->retType) == TYP_VOID);
1036 op2 = impPopStack().val;
1037 op1 = impPopStack().val;
1038 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0);
1043 JITDUMP("Not implemented hardware intrinsic");
1049 GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic intrinsic,
1050 CORINFO_METHOD_HANDLE method,
1051 CORINFO_SIG_INFO* sig,
1054 GenTree* retNode = nullptr;
1055 GenTree* op1 = nullptr;
1056 GenTree* op2 = nullptr;
1057 var_types callType = JITtype2varType(sig->retType);
1059 CORINFO_ARG_LIST_HANDLE argList = sig->args;
1060 CORINFO_CLASS_HANDLE argClass;
1061 CorInfoType corType;
1064 case NI_SSE42_Crc32:
1065 assert(sig->numArgs == 2);
1066 op2 = impPopStack().val;
1067 op1 = impPopStack().val;
1068 argList = info.compCompHnd->getArgNext(argList); // the second argument
1069 corType = strip(info.compCompHnd->getArgType(sig, argList, &argClass)); // type of the second argument
1071 retNode = gtNewScalarHWIntrinsicNode(callType, op1, op2, NI_SSE42_Crc32);
1073 // TODO - currently we use the BaseType to bring the type of the second argument
1074 // to the code generator. May encode the overload info in other way.
1075 retNode->gtHWIntrinsic.gtSIMDBaseType = JITtype2varType(corType);
1079 JITDUMP("Not implemented hardware intrinsic");
1085 //------------------------------------------------------------------------
1086 // normalizeAndGetHalfIndex: compute the half index of a Vector256<baseType>
1087 // and normalize the index to the specific range
1090 // indexPtr -- OUT paramter, the pointer to the original index value
1091 // baseType -- the base type of the Vector256<T>
1094 // retuen the middle index of a Vector256<baseType>
1095 // return the normalized index via indexPtr
1097 static int normalizeAndGetHalfIndex(int* indexPtr, var_types baseType)
1099 assert(varTypeIsArithmetic(baseType));
1100 // clear the unused bits to normalize the index into the range of [0, length of Vector256<baseType>)
1101 *indexPtr = (*indexPtr) & (32 / genTypeSize(baseType) - 1);
1102 return (16 / genTypeSize(baseType));
1105 GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
1106 CORINFO_METHOD_HANDLE method,
1107 CORINFO_SIG_INFO* sig,
1110 GenTree* retNode = nullptr;
1111 GenTree* op1 = nullptr;
1112 GenTree* op2 = nullptr;
1113 var_types baseType = TYP_UNKNOWN;
1114 int simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
1118 case NI_AVX_Extract:
1120 // Avx.Extract executes software implementation when the imm8 argument is not compile-time constant
1121 assert(!mustExpand);
1123 GenTree* lastOp = impPopStack().val;
1124 GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
1125 assert(lastOp->IsCnsIntOrI());
1126 int ival = (int)lastOp->AsIntCon()->IconValue();
1127 baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
1128 var_types retType = JITtype2varType(sig->retType);
1129 assert(varTypeIsArithmetic(baseType));
1131 int midIndex = normalizeAndGetHalfIndex(&ival, baseType);
1132 NamedIntrinsic extractIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Extract : NI_SSE41_Extract;
1133 GenTree* half = nullptr;
1135 if (ival >= midIndex)
1137 half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
1143 half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
1146 retNode = gtNewSimdHWIntrinsicNode(retType, half, gtNewIconNode(ival), extractIntrinsic, baseType, 16);
1152 // Avx.Extract executes software implementation when the imm8 argument is not compile-time constant
1153 assert(!mustExpand);
1155 GenTree* lastOp = impPopStack().val;
1156 GenTree* dataOp = impPopStack().val;
1157 GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
1158 assert(lastOp->IsCnsIntOrI());
1159 int ival = (int)lastOp->AsIntCon()->IconValue();
1160 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1161 assert(varTypeIsArithmetic(baseType));
1163 int midIndex = normalizeAndGetHalfIndex(&ival, baseType);
1164 NamedIntrinsic insertIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Insert : NI_SSE41_Insert;
1166 GenTree* clonedVectorOp;
1168 impCloneExpr(vectorOp, &clonedVectorOp, info.compCompHnd->getArgClass(sig, sig->args),
1169 (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("AVX Insert clones the vector operand"));
1171 if (ival >= midIndex)
1173 GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1),
1174 NI_AVX_ExtractVector128, baseType, 32);
1175 GenTree* ModifiedHalfVector =
1176 gtNewSimdHWIntrinsicNode(TYP_SIMD16, halfVector, dataOp, gtNewIconNode(ival - midIndex),
1177 insertIntrinsic, baseType, 16);
1178 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(1),
1179 NI_AVX_InsertVector128, baseType, 32);
1183 GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
1184 GenTree* ModifiedHalfVector =
1185 gtNewSimdHWIntrinsicNode(TYP_SIMD32, halfVector, dataOp, gtNewIconNode(ival), insertIntrinsic,
1187 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(15),
1188 NI_AVX_Blend, TYP_FLOAT, 32);
1193 case NI_AVX_SetVector256:
1195 // TODO-XARCH: support long/ulong on 32-bit platfroms (remove HW_Flag_SecondArgMaybe64Bit)
1196 int numArgs = sig->numArgs;
1197 assert(numArgs >= 4);
1198 assert(numArgs <= 32);
1199 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1200 GenTree* higherHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, NI_SSE_SetZeroVector128, TYP_FLOAT, 16);
1201 GenTree* lowerHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, NI_SSE_SetZeroVector128, TYP_FLOAT, 16);
1202 NamedIntrinsic insertIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Insert : NI_SSE41_Insert;
1205 if (baseType != TYP_DOUBLE)
1207 assert(varTypeIsIntegral(baseType) || baseType == TYP_FLOAT);
1209 for (int i = 0; i < numArgs / 2; i++)
1211 GenTree* arg = impPopStack().val;
1212 // SSE4.1 insertps has different semantics from integral insert
1213 ival = baseType == TYP_FLOAT ? i * 16 : i;
1214 lowerHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, lowerHalfVector, arg, gtNewIconNode(ival),
1215 insertIntrinsic, baseType, 16);
1218 for (int i = 0; i < numArgs / 2; i++)
1220 GenTree* arg = impPopStack().val;
1221 // SSE4.1 insertps has different semantics from integral insert
1222 ival = baseType == TYP_FLOAT ? i * 16 : i;
1223 higherHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, higherHalfVector, arg, gtNewIconNode(ival),
1224 insertIntrinsic, baseType, 16);
1229 GenTree* op4 = impPopStack().val;
1230 GenTree* op3 = impPopStack().val;
1231 GenTree* op2 = impPopStack().val;
1232 GenTree* op1 = impPopStack().val;
1233 lowerHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE2_UnpackLow, TYP_DOUBLE, 16);
1234 higherHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_UnpackLow, TYP_DOUBLE, 16);
1237 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, lowerHalfVector, higherHalfVector, gtNewIconNode(1),
1238 NI_AVX_InsertVector128, baseType, 32);
1242 case NI_AVX_SetAllVector256:
1244 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1246 // TODO-XARCH: support long/ulong on 32-bit platfroms
1247 if (varTypeIsLong(baseType))
1249 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
1252 GenTree* arg = impPopStack().val;
1253 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, arg, NI_AVX_SetAllVector256, baseType, 32);
1257 case NI_AVX_SetHighLow:
1259 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1260 GenTree* lowerVector = impSIMDPopStack(TYP_SIMD16);
1261 GenTree* higherVector = impSIMDPopStack(TYP_SIMD16);
1262 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, lowerVector, higherVector, gtNewIconNode(1),
1263 NI_AVX_InsertVector128, baseType, 32);
1267 case NI_AVX_ExtractVector128:
1268 case NI_AVX2_ExtractVector128:
1270 GenTree* lastOp = impPopStack().val;
1271 assert(lastOp->IsCnsIntOrI() || mustExpand);
1272 GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
1273 if (sig->numArgs == 2)
1275 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1276 if (!varTypeIsArithmetic(baseType))
1278 retNode = impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
1282 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, lastOp, intrinsic, baseType, 32);
1287 assert(sig->numArgs == 3);
1288 op1 = impPopStack().val;
1289 CORINFO_ARG_LIST_HANDLE secondArg = info.compCompHnd->getArgNext(sig->args);
1290 CORINFO_CLASS_HANDLE secondArgClass = info.compCompHnd->getArgClass(sig, secondArg);
1291 baseType = getBaseTypeOfSIMDType(secondArgClass);
1292 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, vectorOp, lastOp, intrinsic, baseType, 32);
1297 JITDUMP("Not implemented hardware intrinsic");
1303 GenTree* Compiler::impAESIntrinsic(NamedIntrinsic intrinsic,
1304 CORINFO_METHOD_HANDLE method,
1305 CORINFO_SIG_INFO* sig,
1311 GenTree* Compiler::impBMI1Intrinsic(NamedIntrinsic intrinsic,
1312 CORINFO_METHOD_HANDLE method,
1313 CORINFO_SIG_INFO* sig,
1319 GenTree* Compiler::impBMI2Intrinsic(NamedIntrinsic intrinsic,
1320 CORINFO_METHOD_HANDLE method,
1321 CORINFO_SIG_INFO* sig,
1327 GenTree* Compiler::impFMAIntrinsic(NamedIntrinsic intrinsic,
1328 CORINFO_METHOD_HANDLE method,
1329 CORINFO_SIG_INFO* sig,
1335 GenTree* Compiler::impLZCNTIntrinsic(NamedIntrinsic intrinsic,
1336 CORINFO_METHOD_HANDLE method,
1337 CORINFO_SIG_INFO* sig,
1340 assert(sig->numArgs == 1);
1341 var_types callType = JITtype2varType(sig->retType);
1342 return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_LZCNT_LeadingZeroCount);
1345 GenTree* Compiler::impPCLMULQDQIntrinsic(NamedIntrinsic intrinsic,
1346 CORINFO_METHOD_HANDLE method,
1347 CORINFO_SIG_INFO* sig,
1353 GenTree* Compiler::impPOPCNTIntrinsic(NamedIntrinsic intrinsic,
1354 CORINFO_METHOD_HANDLE method,
1355 CORINFO_SIG_INFO* sig,
1358 assert(sig->numArgs == 1);
1359 var_types callType = JITtype2varType(sig->retType);
1360 return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_POPCNT_PopCount);
1363 #endif // FEATURE_HW_INTRINSICS