1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 #include "hwintrinsic.h"
8 #ifdef FEATURE_HW_INTRINSICS
10 static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
12 #define HARDWARE_INTRINSIC(id, name, isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
13 {NI_##id, name, InstructionSet_##isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
15 #include "hwintrinsiclistxarch.h"
18 //------------------------------------------------------------------------
19 // lookup: Gets the HWIntrinsicInfo associated with a given NamedIntrinsic
22 // id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
25 // The HWIntrinsicInfo associated with id
26 const HWIntrinsicInfo& HWIntrinsicInfo::lookup(NamedIntrinsic id)
28 assert(id != NI_Illegal);
30 assert(id > NI_HW_INTRINSIC_START);
31 assert(id < NI_HW_INTRINSIC_END);
33 return hwIntrinsicInfoArray[id - NI_HW_INTRINSIC_START - 1];
36 //------------------------------------------------------------------------
37 // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet
40 // className -- The name of the class associated with the HWIntrinsic to lookup
41 // methodName -- The name of the method associated with the HWIntrinsic to lookup
42 // enclosingClassName -- The name of the enclosing class of X64 classes
45 // The NamedIntrinsic associated with methodName and isa
46 NamedIntrinsic HWIntrinsicInfo::lookupId(const char* className, const char* methodName, const char* enclosingClassName)
48 // TODO-Throughput: replace sequential search by binary search
50 InstructionSet isa = lookupIsa(className, enclosingClassName);
51 assert(isa != InstructionSet_ILLEGAL);
53 assert(methodName != nullptr);
55 for (int i = 0; i < (NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1); i++)
57 if (isa != hwIntrinsicInfoArray[i].isa)
62 if (strcmp(methodName, hwIntrinsicInfoArray[i].name) == 0)
64 return hwIntrinsicInfoArray[i].id;
68 // There are several helper intrinsics that are implemented in managed code
69 // Those intrinsics will hit this code path and need to return NI_Illegal
73 //------------------------------------------------------------------------
74 // X64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet
77 // isa -- The InstructionSet ID
80 // The 64-bit only InstructionSet associated with isa
81 static InstructionSet X64VersionOfIsa(InstructionSet isa)
85 case InstructionSet_SSE:
86 return InstructionSet_SSE_X64;
87 case InstructionSet_SSE2:
88 return InstructionSet_SSE2_X64;
89 case InstructionSet_SSE41:
90 return InstructionSet_SSE41_X64;
91 case InstructionSet_SSE42:
92 return InstructionSet_SSE42_X64;
93 case InstructionSet_BMI1:
94 return InstructionSet_BMI1_X64;
95 case InstructionSet_BMI2:
96 return InstructionSet_BMI2_X64;
97 case InstructionSet_LZCNT:
98 return InstructionSet_LZCNT_X64;
99 case InstructionSet_POPCNT:
100 return InstructionSet_POPCNT_X64;
103 return InstructionSet_ILLEGAL;
107 //------------------------------------------------------------------------
108 // lookupInstructionSet: Gets the InstructionSet for a given class name
111 // className -- The name of the class associated with the InstructionSet to lookup
114 // The InstructionSet associated with className
115 static InstructionSet lookupInstructionSet(const char* className)
117 assert(className != nullptr);
118 if (className[0] == 'A')
120 if (strcmp(className, "Aes") == 0)
122 return InstructionSet_AES;
124 if (strcmp(className, "Avx") == 0)
126 return InstructionSet_AVX;
128 if (strcmp(className, "Avx2") == 0)
130 return InstructionSet_AVX2;
133 else if (className[0] == 'S')
135 if (strcmp(className, "Sse") == 0)
137 return InstructionSet_SSE;
139 if (strcmp(className, "Sse2") == 0)
141 return InstructionSet_SSE2;
143 if (strcmp(className, "Sse3") == 0)
145 return InstructionSet_SSE3;
147 if (strcmp(className, "Ssse3") == 0)
149 return InstructionSet_SSSE3;
151 if (strcmp(className, "Sse41") == 0)
153 return InstructionSet_SSE41;
155 if (strcmp(className, "Sse42") == 0)
157 return InstructionSet_SSE42;
160 else if (className[0] == 'B')
162 if (strcmp(className, "Bmi1") == 0)
164 return InstructionSet_BMI1;
166 if (strcmp(className, "Bmi2") == 0)
168 return InstructionSet_BMI2;
171 else if (className[0] == 'P')
173 if (strcmp(className, "Pclmulqdq") == 0)
175 return InstructionSet_PCLMULQDQ;
177 if (strcmp(className, "Popcnt") == 0)
179 return InstructionSet_POPCNT;
182 else if (strcmp(className, "Fma") == 0)
184 return InstructionSet_FMA;
186 else if (strcmp(className, "Lzcnt") == 0)
188 return InstructionSet_LZCNT;
192 return InstructionSet_ILLEGAL;
195 //------------------------------------------------------------------------
196 // lookupIsa: Gets the InstructionSet for a given class name and enclsoing class name
199 // className -- The name of the class associated with the InstructionSet to lookup
200 // enclosingClassName -- The name of the enclosing class of X64 classes
203 // The InstructionSet associated with className and enclosingClassName
204 InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, const char* enclosingClassName)
206 assert(className != nullptr);
208 if (strcmp(className, "X64") == 0)
210 assert(enclosingClassName != nullptr);
211 return X64VersionOfIsa(lookupInstructionSet(enclosingClassName));
215 return lookupInstructionSet(className);
219 //------------------------------------------------------------------------
220 // lookupSimdSize: Gets the SimdSize for a given HWIntrinsic and signature
223 // id -- The ID associated with the HWIntrinsic to lookup
224 // sig -- The signature of the HWIntrinsic to lookup
227 // The SIMD size for the HWIntrinsic associated with id and sig
230 // This function is only used by the importer. After importation, we can
231 // get the SIMD size from the GenTreeHWIntrinsic node.
232 unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig)
234 if (HWIntrinsicInfo::HasFixedSimdSize(id))
236 return lookupSimdSize(id);
239 CORINFO_CLASS_HANDLE typeHnd = nullptr;
241 if (JITtype2varType(sig->retType) == TYP_STRUCT)
243 typeHnd = sig->retTypeSigClass;
245 else if (HWIntrinsicInfo::BaseTypeFromFirstArg(id))
247 typeHnd = comp->info.compCompHnd->getArgClass(sig, sig->args);
251 assert(HWIntrinsicInfo::BaseTypeFromSecondArg(id));
252 CORINFO_ARG_LIST_HANDLE secondArg = comp->info.compCompHnd->getArgNext(sig->args);
253 typeHnd = comp->info.compCompHnd->getArgClass(sig, secondArg);
256 unsigned simdSize = 0;
257 var_types baseType = comp->getBaseTypeAndSizeOfSIMDType(typeHnd, &simdSize);
258 assert((simdSize > 0) && (baseType != TYP_UNKNOWN));
262 //------------------------------------------------------------------------
263 // lookupNumArgs: Gets the number of args for a given HWIntrinsic node
266 // node -- The HWIntrinsic node to get the number of args for
269 // The number of args for the HWIntrinsic associated with node
270 int HWIntrinsicInfo::lookupNumArgs(const GenTreeHWIntrinsic* node)
272 assert(node != nullptr);
274 NamedIntrinsic id = node->gtHWIntrinsicId;
275 int numArgs = lookupNumArgs(id);
282 assert(numArgs == -1);
284 GenTree* op1 = node->gtGetOp1();
291 if (op1->OperIsList())
293 GenTreeArgList* list = op1->AsArgList();
300 } while (list != nullptr);
305 GenTree* op2 = node->gtGetOp2();
307 return (op2 == nullptr) ? 1 : 2;
310 //------------------------------------------------------------------------
311 // lookupLastOp: Gets the last operand for a given HWIntrinsic node
314 // node -- The HWIntrinsic node to get the last operand for
317 // The last operand for node
318 GenTree* HWIntrinsicInfo::lookupLastOp(const GenTreeHWIntrinsic* node)
320 int numArgs = lookupNumArgs(node);
326 assert(node->gtGetOp1() == nullptr);
327 assert(node->gtGetOp2() == nullptr);
333 assert(node->gtGetOp1() != nullptr);
334 assert(!node->gtGetOp1()->OperIsList());
335 assert(node->gtGetOp2() == nullptr);
337 return node->gtGetOp1();
342 assert(node->gtGetOp1() != nullptr);
343 assert(!node->gtGetOp1()->OperIsList());
344 assert(node->gtGetOp2() != nullptr);
346 return node->gtGetOp2();
351 assert(node->gtGetOp1() != nullptr);
352 assert(node->gtGetOp1()->OperIsList());
353 assert(node->gtGetOp2() == nullptr);
354 assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Current() != nullptr);
355 assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest() == nullptr);
357 return node->gtGetOp1()->AsArgList()->Rest()->Rest()->Current();
362 assert(node->gtGetOp1() != nullptr);
363 assert(node->gtGetOp1()->OperIsList());
364 assert(node->gtGetOp2() == nullptr);
365 assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Current() != nullptr);
366 assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Rest() == nullptr);
368 return node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Current();
379 //------------------------------------------------------------------------
380 // isImmOp: Gets a value that indicates whether the HWIntrinsic node has an imm operand
383 // id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
384 // op -- The operand to check
387 // true if the node has an imm operand; otherwise, false
388 bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op)
390 if (HWIntrinsicInfo::lookupCategory(id) != HW_Category_IMM)
395 if (!HWIntrinsicInfo::MaybeImm(id))
400 if (genActualType(op->TypeGet()) != TYP_INT)
408 //------------------------------------------------------------------------
409 // lookupImmUpperBound: Gets the upper bound for the imm-value of a given NamedIntrinsic
412 // id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
415 // The upper bound for the imm-value of the intrinsic associated with id
417 int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id)
419 assert(HWIntrinsicInfo::lookupCategory(id) == HW_Category_IMM);
424 case NI_AVX_CompareScalar:
426 assert(!HWIntrinsicInfo::HasFullRangeImm(id));
427 return 31; // enum FloatComparisonMode has 32 values
430 case NI_AVX2_GatherVector128:
431 case NI_AVX2_GatherVector256:
432 case NI_AVX2_GatherMaskVector128:
433 case NI_AVX2_GatherMaskVector256:
438 assert(HWIntrinsicInfo::HasFullRangeImm(id));
444 //------------------------------------------------------------------------
445 // isInImmRange: Check if ival is valid for the intrinsic
448 // id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
449 // ival -- the imm value to be checked
452 // true if ival is valid for the intrinsic
454 bool HWIntrinsicInfo::isInImmRange(NamedIntrinsic id, int ival)
456 assert(HWIntrinsicInfo::lookupCategory(id) == HW_Category_IMM);
458 if (isAVX2GatherIntrinsic(id))
460 return ival == 1 || ival == 2 || ival == 4 || ival == 8;
464 return ival <= lookupImmUpperBound(id) && ival >= 0;
468 //------------------------------------------------------------------------
469 // isAVX2GatherIntrinsic: Check if the intrinsic is AVX Gather*
472 // id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
475 // true if id is AVX Gather* intrinsic
477 bool HWIntrinsicInfo::isAVX2GatherIntrinsic(NamedIntrinsic id)
481 case NI_AVX2_GatherVector128:
482 case NI_AVX2_GatherVector256:
483 case NI_AVX2_GatherMaskVector128:
484 case NI_AVX2_GatherMaskVector256:
491 //------------------------------------------------------------------------
492 // isFullyImplementedIsa: Gets a value that indicates whether the InstructionSet is fully implemented
495 // isa - The InstructionSet to check
498 // true if isa is supported; otherwise, false
499 bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa)
503 // These ISAs are fully implemented
504 case InstructionSet_AES:
505 case InstructionSet_AVX:
506 case InstructionSet_AVX2:
507 case InstructionSet_Base:
508 case InstructionSet_BMI1:
509 case InstructionSet_BMI2:
510 case InstructionSet_BMI1_X64:
511 case InstructionSet_BMI2_X64:
512 case InstructionSet_FMA:
513 case InstructionSet_LZCNT:
514 case InstructionSet_LZCNT_X64:
515 case InstructionSet_PCLMULQDQ:
516 case InstructionSet_POPCNT:
517 case InstructionSet_POPCNT_X64:
518 case InstructionSet_SSE:
519 case InstructionSet_SSE_X64:
520 case InstructionSet_SSE2:
521 case InstructionSet_SSE2_X64:
522 case InstructionSet_SSE3:
523 case InstructionSet_SSSE3:
524 case InstructionSet_SSE41:
525 case InstructionSet_SSE41_X64:
526 case InstructionSet_SSE42:
527 case InstructionSet_SSE42_X64:
539 //------------------------------------------------------------------------
540 // isScalarIsa: Gets a value that indicates whether the InstructionSet is scalar
543 // isa - The InstructionSet to check
546 // true if isa is scalar; otherwise, false
547 bool HWIntrinsicInfo::isScalarIsa(InstructionSet isa)
551 case InstructionSet_BMI1:
552 case InstructionSet_BMI2:
553 case InstructionSet_BMI1_X64:
554 case InstructionSet_BMI2_X64:
555 case InstructionSet_LZCNT:
556 case InstructionSet_LZCNT_X64:
557 case InstructionSet_POPCNT:
558 case InstructionSet_POPCNT_X64:
570 //------------------------------------------------------------------------
571 // getArgForHWIntrinsic: get the argument from the stack and match the signature
574 // argType -- the required type of argument
575 // argClass -- the class handle of argType
578 // get the argument at the given index from the stack and match the signature
580 GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass)
582 GenTree* arg = nullptr;
583 if (argType == TYP_STRUCT)
585 unsigned int argSizeBytes;
586 var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes);
587 argType = getSIMDTypeForSize(argSizeBytes);
588 assert((argType == TYP_SIMD32) || (argType == TYP_SIMD16));
589 arg = impSIMDPopStack(argType);
590 assert((arg->TypeGet() == TYP_SIMD16) || (arg->TypeGet() == TYP_SIMD32));
594 assert(varTypeIsArithmetic(argType));
595 arg = impPopStack().val;
596 assert(varTypeIsArithmetic(arg->TypeGet()));
597 assert(genActualType(arg->gtType) == genActualType(argType));
602 //------------------------------------------------------------------------
603 // impNonConstFallback: convert certain SSE2/AVX2 shift intrinsic to its semantic alternative when the imm-arg is
604 // not a compile-time constant
607 // intrinsic -- intrinsic ID
608 // simdType -- Vector type
609 // baseType -- base type of the Vector128/256<T>
612 // return the IR of semantic alternative on non-const imm-arg
614 GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType)
616 assert(HWIntrinsicInfo::NoJmpTableImm(intrinsic));
619 case NI_SSE2_ShiftLeftLogical:
620 case NI_SSE2_ShiftRightArithmetic:
621 case NI_SSE2_ShiftRightLogical:
622 case NI_AVX2_ShiftLeftLogical:
623 case NI_AVX2_ShiftRightArithmetic:
624 case NI_AVX2_ShiftRightLogical:
626 GenTree* op2 = impPopStack().val;
627 GenTree* op1 = impSIMDPopStack(simdType);
629 gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_SSE2_ConvertScalarToVector128Int32, TYP_INT, 16);
630 return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, intrinsic, baseType, genTypeSize(simdType));
639 //------------------------------------------------------------------------
640 // addRangeCheckIfNeeded: add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic
643 // intrinsic -- intrinsic ID
644 // lastOp -- the last operand of the intrinsic that points to the imm-arg
645 // mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics
648 // add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic, which would throw ArgumentOutOfRangeException
649 // when the imm-argument is not in the valid range
651 GenTree* Compiler::addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand)
653 assert(lastOp != nullptr);
654 // Full-range imm-intrinsics do not need the range-check
655 // because the imm-parameter of the intrinsic method is a byte.
656 // AVX2 Gather intrinsics no not need the range-check
657 // because their imm-parameter have discrete valid values that are handle by managed code
658 if (mustExpand && !HWIntrinsicInfo::HasFullRangeImm(intrinsic) && HWIntrinsicInfo::isImmOp(intrinsic, lastOp) &&
659 !HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsic))
661 assert(!lastOp->IsCnsIntOrI());
662 GenTree* upperBoundNode =
663 new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, HWIntrinsicInfo::lookupImmUpperBound(intrinsic));
664 GenTree* index = nullptr;
665 if ((lastOp->gtFlags & GTF_SIDE_EFFECT) != 0)
667 index = fgInsertCommaFormTemp(&lastOp);
671 index = gtCloneExpr(lastOp);
673 GenTreeBoundsChk* hwIntrinsicChk = new (this, GT_HW_INTRINSIC_CHK)
674 GenTreeBoundsChk(GT_HW_INTRINSIC_CHK, TYP_VOID, index, upperBoundNode, SCK_RNGCHK_FAIL);
675 hwIntrinsicChk->gtThrowKind = SCK_ARG_RNG_EXCPN;
676 return gtNewOperNode(GT_COMMA, lastOp->TypeGet(), hwIntrinsicChk, lastOp);
684 //------------------------------------------------------------------------
685 // compSupportsHWIntrinsic: compiler support of hardware intrinsics
688 // isa - Instruction set
691 // - isa is a scalar ISA
692 // - isa is a SIMD ISA and featureSIMD=true
693 // - isa is fully implemented or EnableIncompleteISAClass=true
694 bool Compiler::compSupportsHWIntrinsic(InstructionSet isa)
696 return (featureSIMD || HWIntrinsicInfo::isScalarIsa(isa)) && (
698 JitConfig.EnableIncompleteISAClass() ||
700 HWIntrinsicInfo::isFullyImplementedIsa(isa));
703 //------------------------------------------------------------------------
704 // impIsTableDrivenHWIntrinsic:
707 // category - category of a HW intrinsic
710 // returns true if this category can be table-driven in the importer
712 static bool impIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicCategory category)
714 // HW_Flag_NoCodeGen implies this intrinsic should be manually morphed in the importer.
715 return (category != HW_Category_Special) && (category != HW_Category_Scalar) &&
716 HWIntrinsicInfo::RequiresCodegen(intrinsicId) && !HWIntrinsicInfo::HasSpecialImport(intrinsicId);
719 //------------------------------------------------------------------------
720 // impHWIntrinsic: dispatch hardware intrinsics to their own implementation
723 // intrinsic -- id of the intrinsic function.
724 // method -- method handle of the intrinsic function.
725 // sig -- signature of the intrinsic call
728 // the expanded intrinsic.
730 GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
731 CORINFO_METHOD_HANDLE method,
732 CORINFO_SIG_INFO* sig,
735 InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsic);
736 HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic);
737 int numArgs = sig->numArgs;
738 var_types retType = JITtype2varType(sig->retType);
739 var_types baseType = TYP_UNKNOWN;
741 if ((retType == TYP_STRUCT) && featureSIMD)
743 unsigned int sizeBytes;
744 baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
745 retType = getSIMDTypeForSize(sizeBytes);
746 assert(sizeBytes != 0);
749 // This intrinsic is supported if
750 // - the ISA is available on the underlying hardware (compSupports returns true)
751 // - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
752 bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa);
754 if (category == HW_Category_IsSupportedProperty)
756 return gtNewIconNode(issupported);
758 // - calling to unsupported intrinsics must throw PlatforNotSupportedException
759 else if (!issupported)
761 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
763 // Avoid checking stacktop for 0-op intrinsics
764 if (sig->numArgs > 0 && HWIntrinsicInfo::isImmOp(intrinsic, impStackTop().val))
766 GenTree* lastOp = impStackTop().val;
767 // The imm-HWintrinsics that do not accept all imm8 values may throw
768 // ArgumentOutOfRangeException when the imm argument is not in the valid range
769 if (!HWIntrinsicInfo::HasFullRangeImm(intrinsic))
771 if (!mustExpand && lastOp->IsCnsIntOrI() &&
772 !HWIntrinsicInfo::isInImmRange(intrinsic, (int)lastOp->AsIntCon()->IconValue()))
778 if (!lastOp->IsCnsIntOrI())
780 if (HWIntrinsicInfo::NoJmpTableImm(intrinsic))
782 return impNonConstFallback(intrinsic, retType, baseType);
787 // When the imm-argument is not a constant and we are not being forced to expand, we need to
788 // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
789 // intrinsic method is recursive and will be forced to expand, at which point
790 // we emit some less efficient fallback code.
796 bool isTableDriven = impIsTableDrivenHWIntrinsic(intrinsic, category);
798 if (isTableDriven && ((category == HW_Category_MemoryStore) || HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic) ||
799 HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic)))
801 if (HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic))
803 baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
807 assert((category == HW_Category_MemoryStore) || HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic));
808 CORINFO_ARG_LIST_HANDLE secondArg = info.compCompHnd->getArgNext(sig->args);
809 CORINFO_CLASS_HANDLE secondArgClass = info.compCompHnd->getArgClass(sig, secondArg);
810 baseType = getBaseTypeOfSIMDType(secondArgClass);
812 if (baseType == TYP_UNKNOWN) // the second argument is not a vector
814 baseType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, secondArg, &secondArgClass)));
819 if (HWIntrinsicInfo::IsFloatingPointUsed(intrinsic))
821 // Set `compFloatingPointUsed` to cover the scenario where an intrinsic is being on SIMD fields, but
822 // where no SIMD local vars are in use. This is the same logic as is used for FEATURE_SIMD.
823 compFloatingPointUsed = true;
826 // table-driven importer of simple intrinsics
829 unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
830 CORINFO_ARG_LIST_HANDLE argList = sig->args;
831 CORINFO_CLASS_HANDLE argClass;
832 var_types argType = TYP_UNKNOWN;
834 assert(numArgs >= 0);
835 assert(HWIntrinsicInfo::lookupIns(intrinsic, baseType) != INS_invalid);
836 assert(simdSize == 32 || simdSize == 16);
838 GenTreeHWIntrinsic* retNode = nullptr;
839 GenTree* op1 = nullptr;
840 GenTree* op2 = nullptr;
845 retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
848 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
849 op1 = getArgForHWIntrinsic(argType, argClass);
850 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
853 argType = JITtype2varType(
854 strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(argList), &argClass)));
855 op2 = getArgForHWIntrinsic(argType, argClass);
857 op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand);
859 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
860 op1 = getArgForHWIntrinsic(argType, argClass);
862 retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, baseType, simdSize);
867 CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
868 CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
870 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
871 GenTree* op3 = getArgForHWIntrinsic(argType, argClass);
873 op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand);
875 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
876 op2 = getArgForHWIntrinsic(argType, argClass);
878 if (intrinsic == NI_AVX2_GatherVector128 || intrinsic == NI_AVX2_GatherVector256)
880 assert(varTypeIsSIMD(op2->TypeGet()));
881 op2Type = getBaseTypeOfSIMDType(argClass);
884 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
885 op1 = getArgForHWIntrinsic(argType, argClass);
887 retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, baseType, simdSize);
889 if (intrinsic == NI_AVX2_GatherVector128 || intrinsic == NI_AVX2_GatherVector256)
891 assert(varTypeIsSIMD(op2->TypeGet()));
892 retNode->AsHWIntrinsic()->gtIndexBaseType = op2Type;
901 bool isMemoryStore = retNode->OperIsMemoryStore();
902 if (isMemoryStore || retNode->OperIsMemoryLoad())
906 // A MemoryStore operation is an assignment
907 retNode->gtFlags |= GTF_ASG;
910 // This operation contains an implicit indirection
911 // it could point into the gloabal heap or
912 // it could throw a null reference exception.
914 retNode->gtFlags |= (GTF_GLOB_REF | GTF_EXCEPT);
919 // other intrinsics need special importation
922 case InstructionSet_Base:
923 return impBaseIntrinsic(intrinsic, method, sig, mustExpand);
924 case InstructionSet_SSE:
925 return impSSEIntrinsic(intrinsic, method, sig, mustExpand);
926 case InstructionSet_SSE2:
927 return impSSE2Intrinsic(intrinsic, method, sig, mustExpand);
928 case InstructionSet_SSE42:
929 case InstructionSet_SSE42_X64:
930 return impSSE42Intrinsic(intrinsic, method, sig, mustExpand);
931 case InstructionSet_AVX:
932 case InstructionSet_AVX2:
933 return impAvxOrAvx2Intrinsic(intrinsic, method, sig, mustExpand);
935 case InstructionSet_AES:
936 return impAESIntrinsic(intrinsic, method, sig, mustExpand);
937 case InstructionSet_BMI1:
938 case InstructionSet_BMI1_X64:
939 case InstructionSet_BMI2:
940 case InstructionSet_BMI2_X64:
941 return impBMI1OrBMI2Intrinsic(intrinsic, method, sig, mustExpand);
943 case InstructionSet_FMA:
944 return impFMAIntrinsic(intrinsic, method, sig, mustExpand);
945 case InstructionSet_LZCNT:
946 case InstructionSet_LZCNT_X64:
947 return impLZCNTIntrinsic(intrinsic, method, sig, mustExpand);
948 case InstructionSet_PCLMULQDQ:
949 return impPCLMULQDQIntrinsic(intrinsic, method, sig, mustExpand);
950 case InstructionSet_POPCNT:
951 case InstructionSet_POPCNT_X64:
952 return impPOPCNTIntrinsic(intrinsic, method, sig, mustExpand);
958 //------------------------------------------------------------------------
959 // impBaseIntrinsic: dispatch intrinsics to their own implementation
962 // intrinsic -- id of the intrinsic function.
963 // method -- method handle of the intrinsic function.
964 // sig -- signature of the intrinsic call
965 // mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics
968 // the expanded intrinsic.
970 GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
971 CORINFO_METHOD_HANDLE method,
972 CORINFO_SIG_INFO* sig,
975 GenTree* retNode = nullptr;
976 GenTree* op1 = nullptr;
983 unsigned simdSize = 0;
984 var_types baseType = TYP_UNKNOWN;
985 var_types retType = JITtype2varType(sig->retType);
987 assert(!sig->hasThis());
989 if (HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic))
991 baseType = getBaseTypeAndSizeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args), &simdSize);
993 if (retType == TYP_STRUCT)
995 unsigned retSimdSize = 0;
996 var_types retBasetype = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &retSimdSize);
997 if (!varTypeIsArithmetic(retBasetype))
1001 retType = getSIMDTypeForSize(retSimdSize);
1006 assert(retType == TYP_STRUCT);
1007 baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize);
1008 retType = getSIMDTypeForSize(simdSize);
1011 if (!varTypeIsArithmetic(baseType))
1018 case NI_Base_Vector256_As:
1019 case NI_Base_Vector256_AsByte:
1020 case NI_Base_Vector256_AsDouble:
1021 case NI_Base_Vector256_AsInt16:
1022 case NI_Base_Vector256_AsInt32:
1023 case NI_Base_Vector256_AsInt64:
1024 case NI_Base_Vector256_AsSByte:
1025 case NI_Base_Vector256_AsSingle:
1026 case NI_Base_Vector256_AsUInt16:
1027 case NI_Base_Vector256_AsUInt32:
1028 case NI_Base_Vector256_AsUInt64:
1030 if (!compSupports(InstructionSet_AVX))
1032 // We don't want to deal with TYP_SIMD32 if the compiler doesn't otherwise support the type.
1039 case NI_Base_Vector128_As:
1040 case NI_Base_Vector128_AsByte:
1041 case NI_Base_Vector128_AsDouble:
1042 case NI_Base_Vector128_AsInt16:
1043 case NI_Base_Vector128_AsInt32:
1044 case NI_Base_Vector128_AsInt64:
1045 case NI_Base_Vector128_AsSByte:
1046 case NI_Base_Vector128_AsSingle:
1047 case NI_Base_Vector128_AsUInt16:
1048 case NI_Base_Vector128_AsUInt32:
1049 case NI_Base_Vector128_AsUInt64:
1051 // We fold away the cast here, as it only exists to satisfy
1052 // the type system. It is safe to do this here since the retNode type
1053 // and the signature return type are both the same TYP_SIMD.
1055 assert(sig->numArgs == 1);
1057 retNode = impSIMDPopStack(retType, /* expectAddr: */ false, sig->retTypeClass);
1058 SetOpLclRelatedToSIMDIntrinsic(retNode);
1059 assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
1063 case NI_Base_Vector128_CreateScalarUnsafe:
1065 assert(sig->numArgs == 1);
1068 if (varTypeIsLong(baseType))
1070 // TODO-XARCH-CQ: It may be beneficial to emit the movq
1071 // instruction, which takes a 64-bit memory address and
1072 // works on 32-bit x86 systems.
1075 #endif // _TARGET_X86_
1077 if (compSupports(InstructionSet_SSE2) || (compSupports(InstructionSet_SSE) && (baseType == TYP_FLOAT)))
1079 op1 = impPopStack().val;
1080 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1085 case NI_Base_Vector128_ToScalar:
1087 assert(sig->numArgs == 1);
1089 if (compSupports(InstructionSet_SSE) && varTypeIsFloating(baseType))
1091 op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1092 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 16);
1097 case NI_Base_Vector128_ToVector256:
1098 case NI_Base_Vector128_ToVector256Unsafe:
1099 case NI_Base_Vector256_GetLower:
1101 assert(sig->numArgs == 1);
1103 if (compSupports(InstructionSet_AVX))
1105 op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1106 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1111 case NI_Base_Vector128_Zero:
1113 assert(sig->numArgs == 0);
1115 if (compSupports(InstructionSet_SSE))
1117 retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
1122 case NI_Base_Vector256_CreateScalarUnsafe:
1124 assert(sig->numArgs == 1);
1127 if (varTypeIsLong(baseType))
1129 // TODO-XARCH-CQ: It may be beneficial to emit the movq
1130 // instruction, which takes a 64-bit memory address and
1131 // works on 32-bit x86 systems.
1134 #endif // _TARGET_X86_
1136 if (compSupports(InstructionSet_AVX))
1138 op1 = impPopStack().val;
1139 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1144 case NI_Base_Vector256_ToScalar:
1146 assert(sig->numArgs == 1);
1148 if (compSupports(InstructionSet_AVX) && varTypeIsFloating(baseType))
1150 op1 = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1151 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 32);
1156 case NI_Base_Vector256_Zero:
1158 assert(sig->numArgs == 0);
1160 if (compSupports(InstructionSet_AVX))
1162 retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
1167 case NI_Base_Vector256_WithElement:
1169 if (!compSupports(InstructionSet_AVX))
1171 // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers
1177 case NI_Base_Vector128_WithElement:
1179 assert(sig->numArgs == 3);
1180 GenTree* indexOp = impStackTop(1).val;
1181 if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst())
1183 // Using software fallback if
1184 // 1. JIT/hardware don't support SSE2 instructions
1185 // 2. baseType is not a numeric type (throw execptions)
1186 // 3. index is not a constant
1192 // Using software fallback if baseType is not supported by hardware
1197 if (!compSupports(InstructionSet_SSE41))
1205 if (!compSupports(InstructionSet_SSE41_X64))
1215 // short/ushort/float/double is supported by SSE2
1223 ssize_t imm8 = indexOp->AsIntCon()->IconValue();
1224 ssize_t cachedImm8 = imm8;
1225 ssize_t count = simdSize / genTypeSize(baseType);
1227 if (imm8 >= count || imm8 < 0)
1229 // Using software fallback if index is out of range (throw exeception)
1233 GenTree* valueOp = impPopStack().val;
1235 GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1237 GenTree* clonedVectorOp = nullptr;
1241 // Extract the half vector that will be modified
1242 assert(compSupports(InstructionSet_AVX));
1244 // copy `vectorOp` to accept the modified half vector
1245 vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
1246 nullptr DEBUGARG("Clone Vector for Vector256<T>.WithElement"));
1248 if (imm8 >= count / 2)
1251 vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
1252 baseType, simdSize);
1257 gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize);
1261 GenTree* immNode = gtNewIconNode(imm8);
1267 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_X64_Insert,
1273 if (!compSupports(InstructionSet_SSE41))
1275 // Emulate Vector128<float>.WithElement by SSE instructions
1278 // vector.WithElement(0, value)
1280 // movss xmm0, xmm1 (xmm0 = vector, xmm1 = value)
1281 valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp,
1282 NI_Base_Vector128_CreateScalarUnsafe, TYP_FLOAT, 16);
1283 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, NI_SSE_MoveScalar,
1288 // vector.WithElement(1, value)
1290 // shufps xmm1, xmm0, 0 (xmm0 = vector, xmm1 = value)
1291 // shufps xmm1, xmm0, 226
1293 gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
1295 GenTree* dupVectorOp = nullptr;
1296 vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
1297 nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement"));
1298 tmpOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, vectorOp, gtNewIconNode(0),
1299 NI_SSE_Shuffle, TYP_FLOAT, 16);
1300 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, dupVectorOp, gtNewIconNode(226),
1301 NI_SSE_Shuffle, TYP_FLOAT, 16);
1305 ssize_t controlBits1 = 0;
1306 ssize_t controlBits2 = 0;
1317 // vector.WithElement(2, value)
1319 // shufps xmm1, xmm0, 48 (xmm0 = vector, xmm1 = value)
1320 // shufps xmm0, xmm1, 132
1322 // vector.WithElement(3, value)
1324 // shufps xmm1, xmm0, 32 (xmm0 = vector, xmm1 = value)
1325 // shufps xmm0, xmm1, 36
1327 gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
1329 GenTree* dupVectorOp = nullptr;
1330 vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
1331 nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement"));
1332 valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, tmpOp, gtNewIconNode(controlBits1),
1333 NI_SSE_Shuffle, TYP_FLOAT, 16);
1335 gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, dupVectorOp, gtNewIconNode(controlBits2),
1336 NI_SSE_Shuffle, TYP_FLOAT, 16);
1342 valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
1344 immNode->AsIntCon()->SetIconValue(imm8 * 16);
1354 gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_Insert, baseType, 16);
1360 gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE2_Insert, baseType, 16);
1365 // vector.WithElement(0, value)
1367 // movsd xmm0, xmm1 (xmm0 = vector, xmm1 = value)
1369 // vector.WithElement(1, value)
1371 // unpcklpd xmm0, xmm1 (xmm0 = vector, xmm1 = value)
1372 valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
1374 NamedIntrinsic in = (imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow;
1375 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, in, TYP_DOUBLE, 16);
1386 assert(clonedVectorOp);
1387 int upperOrLower = (cachedImm8 >= count / 2) ? 1 : 0;
1388 retNode = gtNewSimdHWIntrinsicNode(retType, clonedVectorOp, retNode, gtNewIconNode(upperOrLower),
1389 NI_AVX_InsertVector128, baseType, simdSize);
1395 case NI_Base_Vector256_GetElement:
1397 if (!compSupports(InstructionSet_AVX))
1399 // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers
1405 case NI_Base_Vector128_GetElement:
1407 assert(sig->numArgs == 2);
1408 GenTree* indexOp = impStackTop().val;
1409 if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst())
1411 // Using software fallback if
1412 // 1. JIT/hardware don't support SSE2 instructions
1413 // 2. baseType is not a numeric type (throw execptions)
1414 // 3. index is not a constant
1420 // Using software fallback if baseType is not supported by hardware
1425 if (!compSupports(InstructionSet_SSE41))
1433 if (!compSupports(InstructionSet_SSE41_X64))
1443 // short/ushort/float/double is supported by SSE2
1450 ssize_t imm8 = indexOp->AsIntCon()->IconValue();
1451 ssize_t count = simdSize / genTypeSize(baseType);
1453 if (imm8 >= count || imm8 < 0)
1455 // Using software fallback if index is out of range (throw exeception)
1460 GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1461 NamedIntrinsic resIntrinsic = NI_Illegal;
1465 assert(compSupports(InstructionSet_AVX));
1467 if (imm8 >= count / 2)
1470 vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
1471 baseType, simdSize);
1476 gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize);
1480 if (imm8 == 0 && (genTypeSize(baseType) >= 4))
1485 resIntrinsic = NI_SSE2_X64_ConvertToInt64;
1489 resIntrinsic = NI_SSE2_X64_ConvertToUInt64;
1493 resIntrinsic = NI_SSE2_ConvertToInt32;
1497 resIntrinsic = NI_SSE2_ConvertToUInt32;
1502 resIntrinsic = NI_Base_Vector128_ToScalar;
1509 return gtNewSimdHWIntrinsicNode(retType, vectorOp, resIntrinsic, baseType, 16);
1512 GenTree* immNode = gtNewIconNode(imm8);
1518 retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_X64_Extract, baseType, 16);
1523 if (!compSupports(InstructionSet_SSE41))
1527 // Emulate Vector128<float>.GetElement(i) by SSE instructions
1528 // vector.GetElement(i)
1530 // shufps xmm0, xmm0, control
1531 // (xmm0 = vector, control = i + 228)
1532 immNode->AsIntCon()->SetIconValue(228 + imm8);
1533 GenTree* clonedVectorOp = nullptr;
1534 vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
1535 nullptr DEBUGARG("Clone Vector for Vector128<float>.GetElement"));
1536 vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, clonedVectorOp, immNode,
1537 NI_SSE_Shuffle, TYP_FLOAT, 16);
1538 return gtNewSimdHWIntrinsicNode(retType, vectorOp, NI_Base_Vector128_ToScalar, TYP_FLOAT, 16);
1546 retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_Extract, baseType, 16);
1550 // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result
1551 retNode = gtNewSimdHWIntrinsicNode(TYP_UBYTE, vectorOp, immNode, NI_SSE41_Extract, TYP_UBYTE, 16);
1552 retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_BYTE);
1557 // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result
1558 retNode = gtNewSimdHWIntrinsicNode(TYP_USHORT, vectorOp, immNode, NI_SSE2_Extract, TYP_USHORT, 16);
1559 if (baseType == TYP_SHORT)
1561 retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_SHORT);
1567 // vector.GetElement(1)
1569 // pshufd xmm1, xmm0, 0xEE (xmm0 = vector)
1570 vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(0xEE), NI_SSE2_Shuffle,
1573 gtNewSimdHWIntrinsicNode(TYP_DOUBLE, vectorOp, NI_Base_Vector128_ToScalar, TYP_DOUBLE, 16);
1593 GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic,
1594 CORINFO_METHOD_HANDLE method,
1595 CORINFO_SIG_INFO* sig,
1598 GenTree* retNode = nullptr;
1599 GenTree* op1 = nullptr;
1600 GenTree* op2 = nullptr;
1601 GenTree* op3 = nullptr;
1602 GenTree* op4 = nullptr;
1603 int simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
1605 // The Prefetch and StoreFence intrinsics don't take any SIMD operands
1606 // and have a simdSize of 0
1607 assert((simdSize == 16) || (simdSize == 0));
1611 case NI_SSE_Prefetch0:
1612 case NI_SSE_Prefetch1:
1613 case NI_SSE_Prefetch2:
1614 case NI_SSE_PrefetchNonTemporal:
1616 assert(sig->numArgs == 1);
1617 assert(JITtype2varType(sig->retType) == TYP_VOID);
1618 op1 = impPopStack().val;
1619 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, intrinsic, TYP_UBYTE, 0);
1623 case NI_SSE_StoreFence:
1624 assert(sig->numArgs == 0);
1625 assert(JITtype2varType(sig->retType) == TYP_VOID);
1626 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, 0);
1630 JITDUMP("Not implemented hardware intrinsic");
1636 GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic,
1637 CORINFO_METHOD_HANDLE method,
1638 CORINFO_SIG_INFO* sig,
1641 GenTree* retNode = nullptr;
1642 GenTree* op1 = nullptr;
1643 GenTree* op2 = nullptr;
1645 int simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
1646 var_types baseType = TYP_UNKNOWN;
1647 var_types retType = TYP_UNKNOWN;
1649 // The fencing intrinsics don't take any operands and simdSize is 0
1650 assert((simdSize == 16) || (simdSize == 0));
1652 CORINFO_ARG_LIST_HANDLE argList = sig->args;
1653 var_types argType = TYP_UNKNOWN;
1657 case NI_SSE2_CompareLessThan:
1659 assert(sig->numArgs == 2);
1660 op2 = impSIMDPopStack(TYP_SIMD16);
1661 op1 = impSIMDPopStack(TYP_SIMD16);
1662 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1663 if (baseType == TYP_DOUBLE)
1665 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, baseType, simdSize);
1670 gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareGreaterThan, baseType, simdSize);
1675 case NI_SSE2_LoadFence:
1676 case NI_SSE2_MemoryFence:
1678 assert(sig->numArgs == 0);
1679 assert(JITtype2varType(sig->retType) == TYP_VOID);
1680 assert(simdSize == 0);
1682 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, simdSize);
1686 case NI_SSE2_StoreNonTemporal:
1688 assert(sig->numArgs == 2);
1689 assert(JITtype2varType(sig->retType) == TYP_VOID);
1690 op2 = impPopStack().val;
1691 op1 = impPopStack().val;
1692 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0);
1697 JITDUMP("Not implemented hardware intrinsic");
1703 GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic intrinsic,
1704 CORINFO_METHOD_HANDLE method,
1705 CORINFO_SIG_INFO* sig,
1708 GenTree* retNode = nullptr;
1709 GenTree* op1 = nullptr;
1710 GenTree* op2 = nullptr;
1711 var_types callType = JITtype2varType(sig->retType);
1713 CORINFO_ARG_LIST_HANDLE argList = sig->args;
1714 CORINFO_CLASS_HANDLE argClass;
1715 CorInfoType corType;
1718 case NI_SSE42_Crc32:
1719 case NI_SSE42_X64_Crc32:
1720 assert(sig->numArgs == 2);
1721 op2 = impPopStack().val;
1722 op1 = impPopStack().val;
1723 argList = info.compCompHnd->getArgNext(argList); // the second argument
1724 corType = strip(info.compCompHnd->getArgType(sig, argList, &argClass)); // type of the second argument
1726 retNode = gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
1728 // TODO - currently we use the BaseType to bring the type of the second argument
1729 // to the code generator. May encode the overload info in other way.
1730 retNode->gtHWIntrinsic.gtSIMDBaseType = JITtype2varType(corType);
1734 JITDUMP("Not implemented hardware intrinsic");
1740 GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
1741 CORINFO_METHOD_HANDLE method,
1742 CORINFO_SIG_INFO* sig,
1745 GenTree* retNode = nullptr;
1746 GenTree* op1 = nullptr;
1747 GenTree* op2 = nullptr;
1748 var_types baseType = TYP_UNKNOWN;
1749 int simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
1753 case NI_AVX2_PermuteVar8x32:
1755 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1756 // swap the two operands
1757 GenTree* indexVector = impSIMDPopStack(TYP_SIMD32);
1758 GenTree* sourceVector = impSIMDPopStack(TYP_SIMD32);
1760 gtNewSimdHWIntrinsicNode(TYP_SIMD32, indexVector, sourceVector, NI_AVX2_PermuteVar8x32, baseType, 32);
1764 case NI_AVX2_GatherMaskVector128:
1765 case NI_AVX2_GatherMaskVector256:
1767 CORINFO_ARG_LIST_HANDLE argList = sig->args;
1768 CORINFO_CLASS_HANDLE argClass;
1769 var_types argType = TYP_UNKNOWN;
1770 unsigned int sizeBytes;
1771 baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
1772 var_types retType = getSIMDTypeForSize(sizeBytes);
1774 assert(sig->numArgs == 5);
1775 CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
1776 CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
1777 CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3);
1778 CORINFO_ARG_LIST_HANDLE arg5 = info.compCompHnd->getArgNext(arg4);
1780 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg5, &argClass)));
1781 GenTree* op5 = getArgForHWIntrinsic(argType, argClass);
1782 SetOpLclRelatedToSIMDIntrinsic(op5);
1784 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass)));
1785 GenTree* op4 = getArgForHWIntrinsic(argType, argClass);
1786 SetOpLclRelatedToSIMDIntrinsic(op4);
1788 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
1789 var_types indexbaseType = getBaseTypeOfSIMDType(argClass);
1790 GenTree* op3 = getArgForHWIntrinsic(argType, argClass);
1791 SetOpLclRelatedToSIMDIntrinsic(op3);
1793 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
1794 op2 = getArgForHWIntrinsic(argType, argClass);
1795 SetOpLclRelatedToSIMDIntrinsic(op2);
1797 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
1798 op1 = getArgForHWIntrinsic(argType, argClass);
1799 SetOpLclRelatedToSIMDIntrinsic(op1);
1801 GenTree* opList = new (this, GT_LIST) GenTreeArgList(op1, gtNewArgList(op2, op3, op4, op5));
1802 retNode = new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(retType, opList, intrinsic, baseType, simdSize);
1803 retNode->AsHWIntrinsic()->gtIndexBaseType = indexbaseType;
1808 JITDUMP("Not implemented hardware intrinsic");
1814 GenTree* Compiler::impAESIntrinsic(NamedIntrinsic intrinsic,
1815 CORINFO_METHOD_HANDLE method,
1816 CORINFO_SIG_INFO* sig,
1822 GenTree* Compiler::impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic,
1823 CORINFO_METHOD_HANDLE method,
1824 CORINFO_SIG_INFO* sig,
1827 var_types callType = JITtype2varType(sig->retType);
1831 case NI_BMI1_AndNot:
1832 case NI_BMI1_X64_AndNot:
1833 case NI_BMI2_ParallelBitDeposit:
1834 case NI_BMI2_ParallelBitExtract:
1835 case NI_BMI2_X64_ParallelBitDeposit:
1836 case NI_BMI2_X64_ParallelBitExtract:
1838 assert(sig->numArgs == 2);
1840 GenTree* op2 = impPopStack().val;
1841 GenTree* op1 = impPopStack().val;
1843 return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
1846 case NI_BMI2_ZeroHighBits:
1847 case NI_BMI2_X64_ZeroHighBits:
1849 assert(sig->numArgs == 2);
1851 GenTree* op2 = impPopStack().val;
1852 GenTree* op1 = impPopStack().val;
1853 // Instruction BZHI requires to encode op2 (3rd register) in VEX.vvvv and op1 maybe memory operand,
1854 // so swap op1 and op2 to unify the backend code.
1855 return gtNewScalarHWIntrinsicNode(callType, op2, op1, intrinsic);
1858 case NI_BMI1_ExtractLowestSetBit:
1859 case NI_BMI1_GetMaskUpToLowestSetBit:
1860 case NI_BMI1_ResetLowestSetBit:
1861 case NI_BMI1_TrailingZeroCount:
1862 case NI_BMI1_X64_ExtractLowestSetBit:
1863 case NI_BMI1_X64_GetMaskUpToLowestSetBit:
1864 case NI_BMI1_X64_ResetLowestSetBit:
1865 case NI_BMI1_X64_TrailingZeroCount:
1867 assert(sig->numArgs == 1);
1868 GenTree* op1 = impPopStack().val;
1869 return gtNewScalarHWIntrinsicNode(callType, op1, intrinsic);
1872 case NI_BMI1_BitFieldExtract:
1873 case NI_BMI1_X64_BitFieldExtract:
1875 // The 3-arg version is implemented in managed code
1876 if (sig->numArgs == 3)
1880 assert(sig->numArgs == 2);
1882 GenTree* op2 = impPopStack().val;
1883 GenTree* op1 = impPopStack().val;
1884 // Instruction BEXTR requires to encode op2 (3rd register) in VEX.vvvv and op1 maybe memory operand,
1885 // so swap op1 and op2 to unify the backend code.
1886 return gtNewScalarHWIntrinsicNode(callType, op2, op1, intrinsic);
1889 case NI_BMI2_MultiplyNoFlags:
1890 case NI_BMI2_X64_MultiplyNoFlags:
1892 assert(sig->numArgs == 2 || sig->numArgs == 3);
1893 GenTree* op3 = nullptr;
1894 if (sig->numArgs == 3)
1896 op3 = impPopStack().val;
1899 GenTree* op2 = impPopStack().val;
1900 GenTree* op1 = impPopStack().val;
1902 if (sig->numArgs == 3)
1904 return gtNewScalarHWIntrinsicNode(callType, op1, op2, op3, intrinsic);
1908 return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
1920 GenTree* Compiler::impFMAIntrinsic(NamedIntrinsic intrinsic,
1921 CORINFO_METHOD_HANDLE method,
1922 CORINFO_SIG_INFO* sig,
1928 GenTree* Compiler::impLZCNTIntrinsic(NamedIntrinsic intrinsic,
1929 CORINFO_METHOD_HANDLE method,
1930 CORINFO_SIG_INFO* sig,
1933 assert(sig->numArgs == 1);
1934 var_types callType = JITtype2varType(sig->retType);
1935 return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, intrinsic);
1938 GenTree* Compiler::impPCLMULQDQIntrinsic(NamedIntrinsic intrinsic,
1939 CORINFO_METHOD_HANDLE method,
1940 CORINFO_SIG_INFO* sig,
1946 GenTree* Compiler::impPOPCNTIntrinsic(NamedIntrinsic intrinsic,
1947 CORINFO_METHOD_HANDLE method,
1948 CORINFO_SIG_INFO* sig,
1951 assert(sig->numArgs == 1);
1952 var_types callType = JITtype2varType(sig->retType);
1953 return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, intrinsic);
1956 #endif // FEATURE_HW_INTRINSICS