fcc20e04c642888a31efbaf029b02ada3019559c
[platform/upstream/coreclr.git] / src / jit / hwintrinsicxarch.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 #include "jitpch.h"
6 #include "hwintrinsic.h"
7
8 #ifdef FEATURE_HW_INTRINSICS
9
10 static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
11 // clang-format off
12 #define HARDWARE_INTRINSIC(id, name, isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
13     {NI_##id, name, InstructionSet_##isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
14 // clang-format on
15 #include "hwintrinsiclistxarch.h"
16 };
17
18 //------------------------------------------------------------------------
19 // lookup: Gets the HWIntrinsicInfo associated with a given NamedIntrinsic
20 //
21 // Arguments:
22 //    id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
23 //
24 // Return Value:
25 //    The HWIntrinsicInfo associated with id
26 const HWIntrinsicInfo& HWIntrinsicInfo::lookup(NamedIntrinsic id)
27 {
28     assert(id != NI_Illegal);
29
30     assert(id > NI_HW_INTRINSIC_START);
31     assert(id < NI_HW_INTRINSIC_END);
32
33     return hwIntrinsicInfoArray[id - NI_HW_INTRINSIC_START - 1];
34 }
35
36 //------------------------------------------------------------------------
37 // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet
38 //
39 // Arguments:
40 //    className  -- The name of the class associated with the HWIntrinsic to lookup
41 //    methodName -- The name of the method associated with the HWIntrinsic to lookup
42 //    enclosingClassName -- The name of the enclosing class of X64 classes
43 //
44 // Return Value:
45 //    The NamedIntrinsic associated with methodName and isa
46 NamedIntrinsic HWIntrinsicInfo::lookupId(const char* className, const char* methodName, const char* enclosingClassName)
47 {
48     // TODO-Throughput: replace sequential search by binary search
49
50     InstructionSet isa = lookupIsa(className, enclosingClassName);
51     assert(isa != InstructionSet_ILLEGAL);
52
53     assert(methodName != nullptr);
54
55     for (int i = 0; i < (NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1); i++)
56     {
57         if (isa != hwIntrinsicInfoArray[i].isa)
58         {
59             continue;
60         }
61
62         if (strcmp(methodName, hwIntrinsicInfoArray[i].name) == 0)
63         {
64             return hwIntrinsicInfoArray[i].id;
65         }
66     }
67
68     // There are several helper intrinsics that are implemented in managed code
69     // Those intrinsics will hit this code path and need to return NI_Illegal
70     return NI_Illegal;
71 }
72
73 //------------------------------------------------------------------------
74 // X64VersionOfIsa: Gets the corresponding 64-bit only InstructionSet for a given InstructionSet
75 //
76 // Arguments:
77 //    isa -- The InstructionSet ID
78 //
79 // Return Value:
80 //    The 64-bit only InstructionSet associated with isa
81 static InstructionSet X64VersionOfIsa(InstructionSet isa)
82 {
83     switch (isa)
84     {
85         case InstructionSet_SSE:
86             return InstructionSet_SSE_X64;
87         case InstructionSet_SSE2:
88             return InstructionSet_SSE2_X64;
89         case InstructionSet_SSE41:
90             return InstructionSet_SSE41_X64;
91         case InstructionSet_SSE42:
92             return InstructionSet_SSE42_X64;
93         case InstructionSet_BMI1:
94             return InstructionSet_BMI1_X64;
95         case InstructionSet_BMI2:
96             return InstructionSet_BMI2_X64;
97         case InstructionSet_LZCNT:
98             return InstructionSet_LZCNT_X64;
99         case InstructionSet_POPCNT:
100             return InstructionSet_POPCNT_X64;
101         default:
102             unreached();
103             return InstructionSet_ILLEGAL;
104     }
105 }
106
107 //------------------------------------------------------------------------
108 // lookupInstructionSet: Gets the InstructionSet for a given class name
109 //
110 // Arguments:
111 //    className -- The name of the class associated with the InstructionSet to lookup
112 //
113 // Return Value:
114 //    The InstructionSet associated with className
115 static InstructionSet lookupInstructionSet(const char* className)
116 {
117     assert(className != nullptr);
118     if (className[0] == 'A')
119     {
120         if (strcmp(className, "Aes") == 0)
121         {
122             return InstructionSet_AES;
123         }
124         if (strcmp(className, "Avx") == 0)
125         {
126             return InstructionSet_AVX;
127         }
128         if (strcmp(className, "Avx2") == 0)
129         {
130             return InstructionSet_AVX2;
131         }
132     }
133     else if (className[0] == 'S')
134     {
135         if (strcmp(className, "Sse") == 0)
136         {
137             return InstructionSet_SSE;
138         }
139         if (strcmp(className, "Sse2") == 0)
140         {
141             return InstructionSet_SSE2;
142         }
143         if (strcmp(className, "Sse3") == 0)
144         {
145             return InstructionSet_SSE3;
146         }
147         if (strcmp(className, "Ssse3") == 0)
148         {
149             return InstructionSet_SSSE3;
150         }
151         if (strcmp(className, "Sse41") == 0)
152         {
153             return InstructionSet_SSE41;
154         }
155         if (strcmp(className, "Sse42") == 0)
156         {
157             return InstructionSet_SSE42;
158         }
159     }
160     else if (className[0] == 'B')
161     {
162         if (strcmp(className, "Bmi1") == 0)
163         {
164             return InstructionSet_BMI1;
165         }
166         if (strcmp(className, "Bmi2") == 0)
167         {
168             return InstructionSet_BMI2;
169         }
170     }
171     else if (className[0] == 'P')
172     {
173         if (strcmp(className, "Pclmulqdq") == 0)
174         {
175             return InstructionSet_PCLMULQDQ;
176         }
177         if (strcmp(className, "Popcnt") == 0)
178         {
179             return InstructionSet_POPCNT;
180         }
181     }
182     else if (strcmp(className, "Fma") == 0)
183     {
184         return InstructionSet_FMA;
185     }
186     else if (strcmp(className, "Lzcnt") == 0)
187     {
188         return InstructionSet_LZCNT;
189     }
190
191     unreached();
192     return InstructionSet_ILLEGAL;
193 }
194
195 //------------------------------------------------------------------------
196 // lookupIsa: Gets the InstructionSet for a given class name and enclsoing class name
197 //
198 // Arguments:
199 //    className -- The name of the class associated with the InstructionSet to lookup
200 //    enclosingClassName -- The name of the enclosing class of X64 classes
201 //
202 // Return Value:
203 //    The InstructionSet associated with className and enclosingClassName
204 InstructionSet HWIntrinsicInfo::lookupIsa(const char* className, const char* enclosingClassName)
205 {
206     assert(className != nullptr);
207
208     if (strcmp(className, "X64") == 0)
209     {
210         assert(enclosingClassName != nullptr);
211         return X64VersionOfIsa(lookupInstructionSet(enclosingClassName));
212     }
213     else
214     {
215         return lookupInstructionSet(className);
216     }
217 }
218
219 //------------------------------------------------------------------------
220 // lookupSimdSize: Gets the SimdSize for a given HWIntrinsic and signature
221 //
222 // Arguments:
223 //    id -- The ID associated with the HWIntrinsic to lookup
224 //   sig -- The signature of the HWIntrinsic to lookup
225 //
226 // Return Value:
227 //    The SIMD size for the HWIntrinsic associated with id and sig
228 //
229 // Remarks:
230 //    This function is only used by the importer. After importation, we can
231 //    get the SIMD size from the GenTreeHWIntrinsic node.
232 unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig)
233 {
234     if (HWIntrinsicInfo::HasFixedSimdSize(id))
235     {
236         return lookupSimdSize(id);
237     }
238
239     CORINFO_CLASS_HANDLE typeHnd = nullptr;
240
241     if (JITtype2varType(sig->retType) == TYP_STRUCT)
242     {
243         typeHnd = sig->retTypeSigClass;
244     }
245     else if (HWIntrinsicInfo::BaseTypeFromFirstArg(id))
246     {
247         typeHnd = comp->info.compCompHnd->getArgClass(sig, sig->args);
248     }
249     else
250     {
251         assert(HWIntrinsicInfo::BaseTypeFromSecondArg(id));
252         CORINFO_ARG_LIST_HANDLE secondArg = comp->info.compCompHnd->getArgNext(sig->args);
253         typeHnd                           = comp->info.compCompHnd->getArgClass(sig, secondArg);
254     }
255
256     unsigned  simdSize = 0;
257     var_types baseType = comp->getBaseTypeAndSizeOfSIMDType(typeHnd, &simdSize);
258     assert((simdSize > 0) && (baseType != TYP_UNKNOWN));
259     return simdSize;
260 }
261
262 //------------------------------------------------------------------------
263 // lookupNumArgs: Gets the number of args for a given HWIntrinsic node
264 //
265 // Arguments:
266 //    node -- The HWIntrinsic node to get the number of args for
267 //
268 // Return Value:
269 //    The number of args for the HWIntrinsic associated with node
270 int HWIntrinsicInfo::lookupNumArgs(const GenTreeHWIntrinsic* node)
271 {
272     assert(node != nullptr);
273
274     NamedIntrinsic id      = node->gtHWIntrinsicId;
275     int            numArgs = lookupNumArgs(id);
276
277     if (numArgs >= 0)
278     {
279         return numArgs;
280     }
281
282     assert(numArgs == -1);
283
284     GenTree* op1 = node->gtGetOp1();
285
286     if (op1 == nullptr)
287     {
288         return 0;
289     }
290
291     if (op1->OperIsList())
292     {
293         GenTreeArgList* list = op1->AsArgList();
294         numArgs              = 0;
295
296         do
297         {
298             numArgs++;
299             list = list->Rest();
300         } while (list != nullptr);
301
302         return numArgs;
303     }
304
305     GenTree* op2 = node->gtGetOp2();
306
307     return (op2 == nullptr) ? 1 : 2;
308 }
309
310 //------------------------------------------------------------------------
311 // lookupLastOp: Gets the last operand for a given HWIntrinsic node
312 //
313 // Arguments:
314 //    node   -- The HWIntrinsic node to get the last operand for
315 //
316 // Return Value:
317 //     The last operand for node
318 GenTree* HWIntrinsicInfo::lookupLastOp(const GenTreeHWIntrinsic* node)
319 {
320     int numArgs = lookupNumArgs(node);
321
322     switch (numArgs)
323     {
324         case 0:
325         {
326             assert(node->gtGetOp1() == nullptr);
327             assert(node->gtGetOp2() == nullptr);
328             return nullptr;
329         }
330
331         case 1:
332         {
333             assert(node->gtGetOp1() != nullptr);
334             assert(!node->gtGetOp1()->OperIsList());
335             assert(node->gtGetOp2() == nullptr);
336
337             return node->gtGetOp1();
338         }
339
340         case 2:
341         {
342             assert(node->gtGetOp1() != nullptr);
343             assert(!node->gtGetOp1()->OperIsList());
344             assert(node->gtGetOp2() != nullptr);
345
346             return node->gtGetOp2();
347         }
348
349         case 3:
350         {
351             assert(node->gtGetOp1() != nullptr);
352             assert(node->gtGetOp1()->OperIsList());
353             assert(node->gtGetOp2() == nullptr);
354             assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Current() != nullptr);
355             assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest() == nullptr);
356
357             return node->gtGetOp1()->AsArgList()->Rest()->Rest()->Current();
358         }
359
360         case 5:
361         {
362             assert(node->gtGetOp1() != nullptr);
363             assert(node->gtGetOp1()->OperIsList());
364             assert(node->gtGetOp2() == nullptr);
365             assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Current() != nullptr);
366             assert(node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Rest() == nullptr);
367
368             return node->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest()->Rest()->Current();
369         }
370
371         default:
372         {
373             unreached();
374             return nullptr;
375         }
376     }
377 }
378
379 //------------------------------------------------------------------------
380 // isImmOp: Gets a value that indicates whether the HWIntrinsic node has an imm operand
381 //
382 // Arguments:
383 //    id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
384 //    op -- The operand to check
385 //
386 // Return Value:
387 //     true if the node has an imm operand; otherwise, false
388 bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op)
389 {
390     if (HWIntrinsicInfo::lookupCategory(id) != HW_Category_IMM)
391     {
392         return false;
393     }
394
395     if (!HWIntrinsicInfo::MaybeImm(id))
396     {
397         return true;
398     }
399
400     if (genActualType(op->TypeGet()) != TYP_INT)
401     {
402         return false;
403     }
404
405     return true;
406 }
407
408 //------------------------------------------------------------------------
409 // lookupImmUpperBound: Gets the upper bound for the imm-value of a given NamedIntrinsic
410 //
411 // Arguments:
412 //    id -- The NamedIntrinsic associated with the HWIntrinsic to lookup
413 //
414 // Return Value:
415 //     The upper bound for the imm-value of the intrinsic associated with id
416 //
417 int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id)
418 {
419     assert(HWIntrinsicInfo::lookupCategory(id) == HW_Category_IMM);
420
421     switch (id)
422     {
423         case NI_AVX_Compare:
424         case NI_AVX_CompareScalar:
425         {
426             assert(!HWIntrinsicInfo::HasFullRangeImm(id));
427             return 31; // enum FloatComparisonMode has 32 values
428         }
429
430         case NI_AVX2_GatherVector128:
431         case NI_AVX2_GatherVector256:
432         case NI_AVX2_GatherMaskVector128:
433         case NI_AVX2_GatherMaskVector256:
434             return 8;
435
436         default:
437         {
438             assert(HWIntrinsicInfo::HasFullRangeImm(id));
439             return 255;
440         }
441     }
442 }
443
444 //------------------------------------------------------------------------
445 // isInImmRange: Check if ival is valid for the intrinsic
446 //
447 // Arguments:
448 //    id   -- The NamedIntrinsic associated with the HWIntrinsic to lookup
449 //    ival -- the imm value to be checked
450 //
451 // Return Value:
452 //     true if ival is valid for the intrinsic
453 //
454 bool HWIntrinsicInfo::isInImmRange(NamedIntrinsic id, int ival)
455 {
456     assert(HWIntrinsicInfo::lookupCategory(id) == HW_Category_IMM);
457
458     if (isAVX2GatherIntrinsic(id))
459     {
460         return ival == 1 || ival == 2 || ival == 4 || ival == 8;
461     }
462     else
463     {
464         return ival <= lookupImmUpperBound(id) && ival >= 0;
465     }
466 }
467
468 //------------------------------------------------------------------------
469 // isAVX2GatherIntrinsic: Check if the intrinsic is AVX Gather*
470 //
471 // Arguments:
472 //    id   -- The NamedIntrinsic associated with the HWIntrinsic to lookup
473 //
474 // Return Value:
475 //     true if id is AVX Gather* intrinsic
476 //
477 bool HWIntrinsicInfo::isAVX2GatherIntrinsic(NamedIntrinsic id)
478 {
479     switch (id)
480     {
481         case NI_AVX2_GatherVector128:
482         case NI_AVX2_GatherVector256:
483         case NI_AVX2_GatherMaskVector128:
484         case NI_AVX2_GatherMaskVector256:
485             return true;
486         default:
487             return false;
488     }
489 }
490
491 //------------------------------------------------------------------------
492 // isFullyImplementedIsa: Gets a value that indicates whether the InstructionSet is fully implemented
493 //
494 // Arguments:
495 //    isa - The InstructionSet to check
496 //
497 // Return Value:
498 //    true if isa is supported; otherwise, false
499 bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa)
500 {
501     switch (isa)
502     {
503         // These ISAs are fully implemented
504         case InstructionSet_AES:
505         case InstructionSet_AVX:
506         case InstructionSet_AVX2:
507         case InstructionSet_Base:
508         case InstructionSet_BMI1:
509         case InstructionSet_BMI2:
510         case InstructionSet_BMI1_X64:
511         case InstructionSet_BMI2_X64:
512         case InstructionSet_FMA:
513         case InstructionSet_LZCNT:
514         case InstructionSet_LZCNT_X64:
515         case InstructionSet_PCLMULQDQ:
516         case InstructionSet_POPCNT:
517         case InstructionSet_POPCNT_X64:
518         case InstructionSet_SSE:
519         case InstructionSet_SSE_X64:
520         case InstructionSet_SSE2:
521         case InstructionSet_SSE2_X64:
522         case InstructionSet_SSE3:
523         case InstructionSet_SSSE3:
524         case InstructionSet_SSE41:
525         case InstructionSet_SSE41_X64:
526         case InstructionSet_SSE42:
527         case InstructionSet_SSE42_X64:
528         {
529             return true;
530         }
531
532         default:
533         {
534             unreached();
535         }
536     }
537 }
538
539 //------------------------------------------------------------------------
540 // isScalarIsa: Gets a value that indicates whether the InstructionSet is scalar
541 //
542 // Arguments:
543 //    isa - The InstructionSet to check
544 //
545 // Return Value:
546 //    true if isa is scalar; otherwise, false
547 bool HWIntrinsicInfo::isScalarIsa(InstructionSet isa)
548 {
549     switch (isa)
550     {
551         case InstructionSet_BMI1:
552         case InstructionSet_BMI2:
553         case InstructionSet_BMI1_X64:
554         case InstructionSet_BMI2_X64:
555         case InstructionSet_LZCNT:
556         case InstructionSet_LZCNT_X64:
557         case InstructionSet_POPCNT:
558         case InstructionSet_POPCNT_X64:
559         {
560             return true;
561         }
562
563         default:
564         {
565             return false;
566         }
567     }
568 }
569
570 //------------------------------------------------------------------------
571 // getArgForHWIntrinsic: get the argument from the stack and match  the signature
572 //
573 // Arguments:
574 //    argType   -- the required type of argument
575 //    argClass  -- the class handle of argType
576 //
577 // Return Value:
578 //     get the argument at the given index from the stack and match  the signature
579 //
580 GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass)
581 {
582     GenTree* arg = nullptr;
583     if (argType == TYP_STRUCT)
584     {
585         unsigned int argSizeBytes;
586         var_types    base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes);
587         argType           = getSIMDTypeForSize(argSizeBytes);
588         assert((argType == TYP_SIMD32) || (argType == TYP_SIMD16));
589         arg = impSIMDPopStack(argType);
590         assert((arg->TypeGet() == TYP_SIMD16) || (arg->TypeGet() == TYP_SIMD32));
591     }
592     else
593     {
594         assert(varTypeIsArithmetic(argType));
595         arg = impPopStack().val;
596         assert(varTypeIsArithmetic(arg->TypeGet()));
597         assert(genActualType(arg->gtType) == genActualType(argType));
598     }
599     return arg;
600 }
601
602 //------------------------------------------------------------------------
603 // impNonConstFallback: convert certain SSE2/AVX2 shift intrinsic to its semantic alternative when the imm-arg is
604 // not a compile-time constant
605 //
606 // Arguments:
607 //    intrinsic  -- intrinsic ID
608 //    simdType   -- Vector type
609 //    baseType   -- base type of the Vector128/256<T>
610 //
611 // Return Value:
612 //     return the IR of semantic alternative on non-const imm-arg
613 //
614 GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType)
615 {
616     assert(HWIntrinsicInfo::NoJmpTableImm(intrinsic));
617     switch (intrinsic)
618     {
619         case NI_SSE2_ShiftLeftLogical:
620         case NI_SSE2_ShiftRightArithmetic:
621         case NI_SSE2_ShiftRightLogical:
622         case NI_AVX2_ShiftLeftLogical:
623         case NI_AVX2_ShiftRightArithmetic:
624         case NI_AVX2_ShiftRightLogical:
625         {
626             GenTree* op2 = impPopStack().val;
627             GenTree* op1 = impSIMDPopStack(simdType);
628             GenTree* tmpOp =
629                 gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_SSE2_ConvertScalarToVector128Int32, TYP_INT, 16);
630             return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, intrinsic, baseType, genTypeSize(simdType));
631         }
632
633         default:
634             unreached();
635             return nullptr;
636     }
637 }
638
639 //------------------------------------------------------------------------
640 // addRangeCheckIfNeeded: add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic
641 //
642 // Arguments:
643 //    intrinsic  -- intrinsic ID
644 //    lastOp     -- the last operand of the intrinsic that points to the imm-arg
645 //    mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics
646 //
647 // Return Value:
648 //     add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic, which would throw ArgumentOutOfRangeException
649 //     when the imm-argument is not in the valid range
650 //
651 GenTree* Compiler::addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand)
652 {
653     assert(lastOp != nullptr);
654     // Full-range imm-intrinsics do not need the range-check
655     // because the imm-parameter of the intrinsic method is a byte.
656     // AVX2 Gather intrinsics no not need the range-check
657     // because their imm-parameter have discrete valid values that are handle by managed code
658     if (mustExpand && !HWIntrinsicInfo::HasFullRangeImm(intrinsic) && HWIntrinsicInfo::isImmOp(intrinsic, lastOp) &&
659         !HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsic))
660     {
661         assert(!lastOp->IsCnsIntOrI());
662         GenTree* upperBoundNode =
663             new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, HWIntrinsicInfo::lookupImmUpperBound(intrinsic));
664         GenTree* index = nullptr;
665         if ((lastOp->gtFlags & GTF_SIDE_EFFECT) != 0)
666         {
667             index = fgInsertCommaFormTemp(&lastOp);
668         }
669         else
670         {
671             index = gtCloneExpr(lastOp);
672         }
673         GenTreeBoundsChk* hwIntrinsicChk = new (this, GT_HW_INTRINSIC_CHK)
674             GenTreeBoundsChk(GT_HW_INTRINSIC_CHK, TYP_VOID, index, upperBoundNode, SCK_RNGCHK_FAIL);
675         hwIntrinsicChk->gtThrowKind = SCK_ARG_RNG_EXCPN;
676         return gtNewOperNode(GT_COMMA, lastOp->TypeGet(), hwIntrinsicChk, lastOp);
677     }
678     else
679     {
680         return lastOp;
681     }
682 }
683
684 //------------------------------------------------------------------------
685 // compSupportsHWIntrinsic: compiler support of hardware intrinsics
686 //
687 // Arguments:
688 //    isa - Instruction set
689 // Return Value:
690 //    true if
691 //    - isa is a scalar ISA
692 //    - isa is a SIMD ISA and featureSIMD=true
693 //    - isa is fully implemented or EnableIncompleteISAClass=true
694 bool Compiler::compSupportsHWIntrinsic(InstructionSet isa)
695 {
696     return (featureSIMD || HWIntrinsicInfo::isScalarIsa(isa)) && (
697 #ifdef DEBUG
698                                                                      JitConfig.EnableIncompleteISAClass() ||
699 #endif
700                                                                      HWIntrinsicInfo::isFullyImplementedIsa(isa));
701 }
702
703 //------------------------------------------------------------------------
704 // impIsTableDrivenHWIntrinsic:
705 //
706 // Arguments:
707 //    category - category of a HW intrinsic
708 //
709 // Return Value:
710 //    returns true if this category can be table-driven in the importer
711 //
712 static bool impIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicCategory category)
713 {
714     // HW_Flag_NoCodeGen implies this intrinsic should be manually morphed in the importer.
715     return (category != HW_Category_Special) && (category != HW_Category_Scalar) &&
716            HWIntrinsicInfo::RequiresCodegen(intrinsicId) && !HWIntrinsicInfo::HasSpecialImport(intrinsicId);
717 }
718
719 //------------------------------------------------------------------------
720 // impHWIntrinsic: dispatch hardware intrinsics to their own implementation
721 //
722 // Arguments:
723 //    intrinsic -- id of the intrinsic function.
724 //    method    -- method handle of the intrinsic function.
725 //    sig       -- signature of the intrinsic call
726 //
727 // Return Value:
728 //    the expanded intrinsic.
729 //
730 GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
731                                   CORINFO_METHOD_HANDLE method,
732                                   CORINFO_SIG_INFO*     sig,
733                                   bool                  mustExpand)
734 {
735     InstructionSet      isa      = HWIntrinsicInfo::lookupIsa(intrinsic);
736     HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic);
737     int                 numArgs  = sig->numArgs;
738     var_types           retType  = JITtype2varType(sig->retType);
739     var_types           baseType = TYP_UNKNOWN;
740
741     if ((retType == TYP_STRUCT) && featureSIMD)
742     {
743         unsigned int sizeBytes;
744         baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
745         retType  = getSIMDTypeForSize(sizeBytes);
746         assert(sizeBytes != 0);
747     }
748
749     // This intrinsic is supported if
750     // - the ISA is available on the underlying hardware (compSupports returns true)
751     // - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
752     bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa);
753
754     if (category == HW_Category_IsSupportedProperty)
755     {
756         return gtNewIconNode(issupported);
757     }
758     // - calling to unsupported intrinsics must throw PlatforNotSupportedException
759     else if (!issupported)
760     {
761         return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
762     }
763     // Avoid checking stacktop for 0-op intrinsics
764     if (sig->numArgs > 0 && HWIntrinsicInfo::isImmOp(intrinsic, impStackTop().val))
765     {
766         GenTree* lastOp = impStackTop().val;
767         // The imm-HWintrinsics that do not accept all imm8 values may throw
768         // ArgumentOutOfRangeException when the imm argument is not in the valid range
769         if (!HWIntrinsicInfo::HasFullRangeImm(intrinsic))
770         {
771             if (!mustExpand && lastOp->IsCnsIntOrI() &&
772                 !HWIntrinsicInfo::isInImmRange(intrinsic, (int)lastOp->AsIntCon()->IconValue()))
773             {
774                 return nullptr;
775             }
776         }
777
778         if (!lastOp->IsCnsIntOrI())
779         {
780             if (HWIntrinsicInfo::NoJmpTableImm(intrinsic))
781             {
782                 return impNonConstFallback(intrinsic, retType, baseType);
783             }
784
785             if (!mustExpand)
786             {
787                 // When the imm-argument is not a constant and we are not being forced to expand, we need to
788                 // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
789                 // intrinsic method is recursive and will be forced to expand, at which point
790                 // we emit some less efficient fallback code.
791                 return nullptr;
792             }
793         }
794     }
795
796     bool isTableDriven = impIsTableDrivenHWIntrinsic(intrinsic, category);
797
798     if (isTableDriven && ((category == HW_Category_MemoryStore) || HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic) ||
799                           HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic)))
800     {
801         if (HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic))
802         {
803             baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
804         }
805         else
806         {
807             assert((category == HW_Category_MemoryStore) || HWIntrinsicInfo::BaseTypeFromSecondArg(intrinsic));
808             CORINFO_ARG_LIST_HANDLE secondArg      = info.compCompHnd->getArgNext(sig->args);
809             CORINFO_CLASS_HANDLE    secondArgClass = info.compCompHnd->getArgClass(sig, secondArg);
810             baseType                               = getBaseTypeOfSIMDType(secondArgClass);
811
812             if (baseType == TYP_UNKNOWN) // the second argument is not a vector
813             {
814                 baseType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, secondArg, &secondArgClass)));
815             }
816         }
817     }
818
819     if (HWIntrinsicInfo::IsFloatingPointUsed(intrinsic))
820     {
821         // Set `compFloatingPointUsed` to cover the scenario where an intrinsic is being on SIMD fields, but
822         // where no SIMD local vars are in use. This is the same logic as is used for FEATURE_SIMD.
823         compFloatingPointUsed = true;
824     }
825
826     // table-driven importer of simple intrinsics
827     if (isTableDriven)
828     {
829         unsigned                simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
830         CORINFO_ARG_LIST_HANDLE argList  = sig->args;
831         CORINFO_CLASS_HANDLE    argClass;
832         var_types               argType = TYP_UNKNOWN;
833
834         assert(numArgs >= 0);
835         assert(HWIntrinsicInfo::lookupIns(intrinsic, baseType) != INS_invalid);
836         assert(simdSize == 32 || simdSize == 16);
837
838         GenTreeHWIntrinsic* retNode = nullptr;
839         GenTree*            op1     = nullptr;
840         GenTree*            op2     = nullptr;
841
842         switch (numArgs)
843         {
844             case 0:
845                 retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
846                 break;
847             case 1:
848                 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
849                 op1     = getArgForHWIntrinsic(argType, argClass);
850                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
851                 break;
852             case 2:
853                 argType = JITtype2varType(
854                     strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(argList), &argClass)));
855                 op2 = getArgForHWIntrinsic(argType, argClass);
856
857                 op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand);
858
859                 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
860                 op1     = getArgForHWIntrinsic(argType, argClass);
861
862                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, baseType, simdSize);
863                 break;
864
865             case 3:
866             {
867                 CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
868                 CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
869
870                 argType      = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
871                 GenTree* op3 = getArgForHWIntrinsic(argType, argClass);
872
873                 op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand);
874
875                 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
876                 op2     = getArgForHWIntrinsic(argType, argClass);
877                 var_types op2Type;
878                 if (intrinsic == NI_AVX2_GatherVector128 || intrinsic == NI_AVX2_GatherVector256)
879                 {
880                     assert(varTypeIsSIMD(op2->TypeGet()));
881                     op2Type = getBaseTypeOfSIMDType(argClass);
882                 }
883
884                 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
885                 op1     = getArgForHWIntrinsic(argType, argClass);
886
887                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, baseType, simdSize);
888
889                 if (intrinsic == NI_AVX2_GatherVector128 || intrinsic == NI_AVX2_GatherVector256)
890                 {
891                     assert(varTypeIsSIMD(op2->TypeGet()));
892                     retNode->AsHWIntrinsic()->gtIndexBaseType = op2Type;
893                 }
894                 break;
895             }
896
897             default:
898                 unreached();
899         }
900
901         bool isMemoryStore = retNode->OperIsMemoryStore();
902         if (isMemoryStore || retNode->OperIsMemoryLoad())
903         {
904             if (isMemoryStore)
905             {
906                 // A MemoryStore operation is an assignment
907                 retNode->gtFlags |= GTF_ASG;
908             }
909
910             // This operation contains an implicit indirection
911             //   it could point into the gloabal heap or
912             //   it could throw a null reference exception.
913             //
914             retNode->gtFlags |= (GTF_GLOB_REF | GTF_EXCEPT);
915         }
916         return retNode;
917     }
918
919     // other intrinsics need special importation
920     switch (isa)
921     {
922         case InstructionSet_Base:
923             return impBaseIntrinsic(intrinsic, method, sig, mustExpand);
924         case InstructionSet_SSE:
925             return impSSEIntrinsic(intrinsic, method, sig, mustExpand);
926         case InstructionSet_SSE2:
927             return impSSE2Intrinsic(intrinsic, method, sig, mustExpand);
928         case InstructionSet_SSE42:
929         case InstructionSet_SSE42_X64:
930             return impSSE42Intrinsic(intrinsic, method, sig, mustExpand);
931         case InstructionSet_AVX:
932         case InstructionSet_AVX2:
933             return impAvxOrAvx2Intrinsic(intrinsic, method, sig, mustExpand);
934
935         case InstructionSet_AES:
936             return impAESIntrinsic(intrinsic, method, sig, mustExpand);
937         case InstructionSet_BMI1:
938         case InstructionSet_BMI1_X64:
939         case InstructionSet_BMI2:
940         case InstructionSet_BMI2_X64:
941             return impBMI1OrBMI2Intrinsic(intrinsic, method, sig, mustExpand);
942
943         case InstructionSet_FMA:
944             return impFMAIntrinsic(intrinsic, method, sig, mustExpand);
945         case InstructionSet_LZCNT:
946         case InstructionSet_LZCNT_X64:
947             return impLZCNTIntrinsic(intrinsic, method, sig, mustExpand);
948         case InstructionSet_PCLMULQDQ:
949             return impPCLMULQDQIntrinsic(intrinsic, method, sig, mustExpand);
950         case InstructionSet_POPCNT:
951         case InstructionSet_POPCNT_X64:
952             return impPOPCNTIntrinsic(intrinsic, method, sig, mustExpand);
953         default:
954             return nullptr;
955     }
956 }
957
958 //------------------------------------------------------------------------
959 // impBaseIntrinsic: dispatch intrinsics to their own implementation
960 //
961 // Arguments:
962 //    intrinsic  -- id of the intrinsic function.
963 //    method     -- method handle of the intrinsic function.
964 //    sig        -- signature of the intrinsic call
965 //    mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics
966 //
967 // Return Value:
968 //    the expanded intrinsic.
969 //
970 GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
971                                     CORINFO_METHOD_HANDLE method,
972                                     CORINFO_SIG_INFO*     sig,
973                                     bool                  mustExpand)
974 {
975     GenTree* retNode = nullptr;
976     GenTree* op1     = nullptr;
977
978     if (!featureSIMD)
979     {
980         return nullptr;
981     }
982
983     unsigned  simdSize = 0;
984     var_types baseType = TYP_UNKNOWN;
985     var_types retType  = JITtype2varType(sig->retType);
986
987     assert(!sig->hasThis());
988
989     if (HWIntrinsicInfo::BaseTypeFromFirstArg(intrinsic))
990     {
991         baseType = getBaseTypeAndSizeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args), &simdSize);
992
993         if (retType == TYP_STRUCT)
994         {
995             unsigned  retSimdSize = 0;
996             var_types retBasetype = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &retSimdSize);
997             if (!varTypeIsArithmetic(retBasetype))
998             {
999                 return nullptr;
1000             }
1001             retType = getSIMDTypeForSize(retSimdSize);
1002         }
1003     }
1004     else
1005     {
1006         assert(retType == TYP_STRUCT);
1007         baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize);
1008         retType  = getSIMDTypeForSize(simdSize);
1009     }
1010
1011     if (!varTypeIsArithmetic(baseType))
1012     {
1013         return nullptr;
1014     }
1015
1016     switch (intrinsic)
1017     {
1018         case NI_Base_Vector256_As:
1019         case NI_Base_Vector256_AsByte:
1020         case NI_Base_Vector256_AsDouble:
1021         case NI_Base_Vector256_AsInt16:
1022         case NI_Base_Vector256_AsInt32:
1023         case NI_Base_Vector256_AsInt64:
1024         case NI_Base_Vector256_AsSByte:
1025         case NI_Base_Vector256_AsSingle:
1026         case NI_Base_Vector256_AsUInt16:
1027         case NI_Base_Vector256_AsUInt32:
1028         case NI_Base_Vector256_AsUInt64:
1029         {
1030             if (!compSupports(InstructionSet_AVX))
1031             {
1032                 // We don't want to deal with TYP_SIMD32 if the compiler doesn't otherwise support the type.
1033                 break;
1034             }
1035
1036             __fallthrough;
1037         }
1038
1039         case NI_Base_Vector128_As:
1040         case NI_Base_Vector128_AsByte:
1041         case NI_Base_Vector128_AsDouble:
1042         case NI_Base_Vector128_AsInt16:
1043         case NI_Base_Vector128_AsInt32:
1044         case NI_Base_Vector128_AsInt64:
1045         case NI_Base_Vector128_AsSByte:
1046         case NI_Base_Vector128_AsSingle:
1047         case NI_Base_Vector128_AsUInt16:
1048         case NI_Base_Vector128_AsUInt32:
1049         case NI_Base_Vector128_AsUInt64:
1050         {
1051             // We fold away the cast here, as it only exists to satisfy
1052             // the type system. It is safe to do this here since the retNode type
1053             // and the signature return type are both the same TYP_SIMD.
1054
1055             assert(sig->numArgs == 1);
1056
1057             retNode = impSIMDPopStack(retType, /* expectAddr: */ false, sig->retTypeClass);
1058             SetOpLclRelatedToSIMDIntrinsic(retNode);
1059             assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
1060             break;
1061         }
1062
1063         case NI_Base_Vector128_CreateScalarUnsafe:
1064         {
1065             assert(sig->numArgs == 1);
1066
1067 #ifdef _TARGET_X86_
1068             if (varTypeIsLong(baseType))
1069             {
1070                 // TODO-XARCH-CQ: It may be beneficial to emit the movq
1071                 // instruction, which takes a 64-bit memory address and
1072                 // works on 32-bit x86 systems.
1073                 break;
1074             }
1075 #endif // _TARGET_X86_
1076
1077             if (compSupports(InstructionSet_SSE2) || (compSupports(InstructionSet_SSE) && (baseType == TYP_FLOAT)))
1078             {
1079                 op1     = impPopStack().val;
1080                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1081             }
1082             break;
1083         }
1084
1085         case NI_Base_Vector128_ToScalar:
1086         {
1087             assert(sig->numArgs == 1);
1088
1089             if (compSupports(InstructionSet_SSE) && varTypeIsFloating(baseType))
1090             {
1091                 op1     = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1092                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 16);
1093             }
1094             break;
1095         }
1096
1097         case NI_Base_Vector128_ToVector256:
1098         case NI_Base_Vector128_ToVector256Unsafe:
1099         case NI_Base_Vector256_GetLower:
1100         {
1101             assert(sig->numArgs == 1);
1102
1103             if (compSupports(InstructionSet_AVX))
1104             {
1105                 op1     = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1106                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1107             }
1108             break;
1109         }
1110
1111         case NI_Base_Vector128_Zero:
1112         {
1113             assert(sig->numArgs == 0);
1114
1115             if (compSupports(InstructionSet_SSE))
1116             {
1117                 retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
1118             }
1119             break;
1120         }
1121
1122         case NI_Base_Vector256_CreateScalarUnsafe:
1123         {
1124             assert(sig->numArgs == 1);
1125
1126 #ifdef _TARGET_X86_
1127             if (varTypeIsLong(baseType))
1128             {
1129                 // TODO-XARCH-CQ: It may be beneficial to emit the movq
1130                 // instruction, which takes a 64-bit memory address and
1131                 // works on 32-bit x86 systems.
1132                 break;
1133             }
1134 #endif // _TARGET_X86_
1135
1136             if (compSupports(InstructionSet_AVX))
1137             {
1138                 op1     = impPopStack().val;
1139                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1140             }
1141             break;
1142         }
1143
1144         case NI_Base_Vector256_ToScalar:
1145         {
1146             assert(sig->numArgs == 1);
1147
1148             if (compSupports(InstructionSet_AVX) && varTypeIsFloating(baseType))
1149             {
1150                 op1     = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1151                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, 32);
1152             }
1153             break;
1154         }
1155
1156         case NI_Base_Vector256_Zero:
1157         {
1158             assert(sig->numArgs == 0);
1159
1160             if (compSupports(InstructionSet_AVX))
1161             {
1162                 retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
1163             }
1164             break;
1165         }
1166
1167         case NI_Base_Vector256_WithElement:
1168         {
1169             if (!compSupports(InstructionSet_AVX))
1170             {
1171                 // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers
1172                 return nullptr;
1173             }
1174             __fallthrough;
1175         }
1176
1177         case NI_Base_Vector128_WithElement:
1178         {
1179             assert(sig->numArgs == 3);
1180             GenTree* indexOp = impStackTop(1).val;
1181             if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst())
1182             {
1183                 // Using software fallback if
1184                 // 1. JIT/hardware don't support SSE2 instructions
1185                 // 2. baseType is not a numeric type (throw execptions)
1186                 // 3. index is not a constant
1187                 return nullptr;
1188             }
1189
1190             switch (baseType)
1191             {
1192                 // Using software fallback if baseType is not supported by hardware
1193                 case TYP_BYTE:
1194                 case TYP_UBYTE:
1195                 case TYP_INT:
1196                 case TYP_UINT:
1197                     if (!compSupports(InstructionSet_SSE41))
1198                     {
1199                         return nullptr;
1200                     }
1201                     break;
1202
1203                 case TYP_LONG:
1204                 case TYP_ULONG:
1205                     if (!compSupports(InstructionSet_SSE41_X64))
1206                     {
1207                         return nullptr;
1208                     }
1209                     break;
1210
1211                 case TYP_DOUBLE:
1212                 case TYP_FLOAT:
1213                 case TYP_SHORT:
1214                 case TYP_USHORT:
1215                     // short/ushort/float/double is supported by SSE2
1216                     break;
1217
1218                 default:
1219                     unreached();
1220                     break;
1221             }
1222
1223             ssize_t imm8       = indexOp->AsIntCon()->IconValue();
1224             ssize_t cachedImm8 = imm8;
1225             ssize_t count      = simdSize / genTypeSize(baseType);
1226
1227             if (imm8 >= count || imm8 < 0)
1228             {
1229                 // Using software fallback if index is out of range (throw exeception)
1230                 return nullptr;
1231             }
1232
1233             GenTree* valueOp = impPopStack().val;
1234             impPopStack();
1235             GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1236
1237             GenTree* clonedVectorOp = nullptr;
1238
1239             if (simdSize == 32)
1240             {
1241                 // Extract the half vector that will be modified
1242                 assert(compSupports(InstructionSet_AVX));
1243
1244                 // copy `vectorOp` to accept the modified half vector
1245                 vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
1246                                         nullptr DEBUGARG("Clone Vector for Vector256<T>.WithElement"));
1247
1248                 if (imm8 >= count / 2)
1249                 {
1250                     imm8 -= count / 2;
1251                     vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
1252                                                         baseType, simdSize);
1253                 }
1254                 else
1255                 {
1256                     vectorOp =
1257                         gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize);
1258                 }
1259             }
1260
1261             GenTree* immNode = gtNewIconNode(imm8);
1262
1263             switch (baseType)
1264             {
1265                 case TYP_LONG:
1266                 case TYP_ULONG:
1267                     retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_X64_Insert,
1268                                                        baseType, 16);
1269                     break;
1270
1271                 case TYP_FLOAT:
1272                 {
1273                     if (!compSupports(InstructionSet_SSE41))
1274                     {
1275                         // Emulate Vector128<float>.WithElement by SSE instructions
1276                         if (imm8 == 0)
1277                         {
1278                             // vector.WithElement(0, value)
1279                             // =>
1280                             // movss   xmm0, xmm1 (xmm0 = vector, xmm1 = value)
1281                             valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp,
1282                                                                NI_Base_Vector128_CreateScalarUnsafe, TYP_FLOAT, 16);
1283                             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, NI_SSE_MoveScalar,
1284                                                                TYP_FLOAT, 16);
1285                         }
1286                         else if (imm8 == 1)
1287                         {
1288                             // vector.WithElement(1, value)
1289                             // =>
1290                             // shufps  xmm1, xmm0, 0   (xmm0 = vector, xmm1 = value)
1291                             // shufps  xmm1, xmm0, 226
1292                             GenTree* tmpOp =
1293                                 gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
1294                                                          TYP_FLOAT, 16);
1295                             GenTree* dupVectorOp = nullptr;
1296                             vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
1297                                                     nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement"));
1298                             tmpOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, vectorOp, gtNewIconNode(0),
1299                                                              NI_SSE_Shuffle, TYP_FLOAT, 16);
1300                             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmpOp, dupVectorOp, gtNewIconNode(226),
1301                                                                NI_SSE_Shuffle, TYP_FLOAT, 16);
1302                         }
1303                         else
1304                         {
1305                             ssize_t controlBits1 = 0;
1306                             ssize_t controlBits2 = 0;
1307                             if (imm8 == 2)
1308                             {
1309                                 controlBits1 = 48;
1310                                 controlBits2 = 132;
1311                             }
1312                             else
1313                             {
1314                                 controlBits1 = 32;
1315                                 controlBits2 = 36;
1316                             }
1317                             // vector.WithElement(2, value)
1318                             // =>
1319                             // shufps  xmm1, xmm0, 48   (xmm0 = vector, xmm1 = value)
1320                             // shufps  xmm0, xmm1, 132
1321                             //
1322                             // vector.WithElement(3, value)
1323                             // =>
1324                             // shufps  xmm1, xmm0, 32   (xmm0 = vector, xmm1 = value)
1325                             // shufps  xmm0, xmm1, 36
1326                             GenTree* tmpOp =
1327                                 gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
1328                                                          TYP_FLOAT, 16);
1329                             GenTree* dupVectorOp = nullptr;
1330                             vectorOp = impCloneExpr(vectorOp, &dupVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
1331                                                     nullptr DEBUGARG("Clone Vector for Vector128<float>.WithElement"));
1332                             valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, tmpOp, gtNewIconNode(controlBits1),
1333                                                                NI_SSE_Shuffle, TYP_FLOAT, 16);
1334                             retNode =
1335                                 gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, dupVectorOp, gtNewIconNode(controlBits2),
1336                                                          NI_SSE_Shuffle, TYP_FLOAT, 16);
1337                         }
1338                         break;
1339                     }
1340                     else
1341                     {
1342                         valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
1343                                                            TYP_FLOAT, 16);
1344                         immNode->AsIntCon()->SetIconValue(imm8 * 16);
1345                         __fallthrough;
1346                     }
1347                 }
1348
1349                 case TYP_BYTE:
1350                 case TYP_UBYTE:
1351                 case TYP_INT:
1352                 case TYP_UINT:
1353                     retNode =
1354                         gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE41_Insert, baseType, 16);
1355                     break;
1356
1357                 case TYP_SHORT:
1358                 case TYP_USHORT:
1359                     retNode =
1360                         gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, immNode, NI_SSE2_Insert, baseType, 16);
1361                     break;
1362
1363                 case TYP_DOUBLE:
1364                 {
1365                     // vector.WithElement(0, value)
1366                     // =>
1367                     // movsd   xmm0, xmm1  (xmm0 = vector, xmm1 = value)
1368                     //
1369                     // vector.WithElement(1, value)
1370                     // =>
1371                     // unpcklpd  xmm0, xmm1  (xmm0 = vector, xmm1 = value)
1372                     valueOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, valueOp, NI_Base_Vector128_CreateScalarUnsafe,
1373                                                        TYP_DOUBLE, 16);
1374                     NamedIntrinsic in = (imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow;
1375                     retNode           = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, valueOp, in, TYP_DOUBLE, 16);
1376                     break;
1377                 }
1378
1379                 default:
1380                     unreached();
1381                     break;
1382             }
1383
1384             if (simdSize == 32)
1385             {
1386                 assert(clonedVectorOp);
1387                 int upperOrLower = (cachedImm8 >= count / 2) ? 1 : 0;
1388                 retNode = gtNewSimdHWIntrinsicNode(retType, clonedVectorOp, retNode, gtNewIconNode(upperOrLower),
1389                                                    NI_AVX_InsertVector128, baseType, simdSize);
1390             }
1391
1392             break;
1393         }
1394
1395         case NI_Base_Vector256_GetElement:
1396         {
1397             if (!compSupports(InstructionSet_AVX))
1398             {
1399                 // Using software fallback if JIT/hardware don't support AVX instructions and YMM registers
1400                 return nullptr;
1401             }
1402             __fallthrough;
1403         }
1404
1405         case NI_Base_Vector128_GetElement:
1406         {
1407             assert(sig->numArgs == 2);
1408             GenTree* indexOp = impStackTop().val;
1409             if (!compSupports(InstructionSet_SSE2) || !varTypeIsArithmetic(baseType) || !indexOp->OperIsConst())
1410             {
1411                 // Using software fallback if
1412                 // 1. JIT/hardware don't support SSE2 instructions
1413                 // 2. baseType is not a numeric type (throw execptions)
1414                 // 3. index is not a constant
1415                 return nullptr;
1416             }
1417
1418             switch (baseType)
1419             {
1420                 // Using software fallback if baseType is not supported by hardware
1421                 case TYP_BYTE:
1422                 case TYP_UBYTE:
1423                 case TYP_INT:
1424                 case TYP_UINT:
1425                     if (!compSupports(InstructionSet_SSE41))
1426                     {
1427                         return nullptr;
1428                     }
1429                     break;
1430
1431                 case TYP_LONG:
1432                 case TYP_ULONG:
1433                     if (!compSupports(InstructionSet_SSE41_X64))
1434                     {
1435                         return nullptr;
1436                     }
1437                     break;
1438
1439                 case TYP_DOUBLE:
1440                 case TYP_FLOAT:
1441                 case TYP_SHORT:
1442                 case TYP_USHORT:
1443                     // short/ushort/float/double is supported by SSE2
1444                     break;
1445
1446                 default:
1447                     break;
1448             }
1449
1450             ssize_t imm8  = indexOp->AsIntCon()->IconValue();
1451             ssize_t count = simdSize / genTypeSize(baseType);
1452
1453             if (imm8 >= count || imm8 < 0)
1454             {
1455                 // Using software fallback if index is out of range (throw exeception)
1456                 return nullptr;
1457             }
1458
1459             impPopStack();
1460             GenTree*       vectorOp     = impSIMDPopStack(getSIMDTypeForSize(simdSize));
1461             NamedIntrinsic resIntrinsic = NI_Illegal;
1462
1463             if (simdSize == 32)
1464             {
1465                 assert(compSupports(InstructionSet_AVX));
1466
1467                 if (imm8 >= count / 2)
1468                 {
1469                     imm8 -= count / 2;
1470                     vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
1471                                                         baseType, simdSize);
1472                 }
1473                 else
1474                 {
1475                     vectorOp =
1476                         gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_Base_Vector256_GetLower, baseType, simdSize);
1477                 }
1478             }
1479
1480             if (imm8 == 0 && (genTypeSize(baseType) >= 4))
1481             {
1482                 switch (baseType)
1483                 {
1484                     case TYP_LONG:
1485                         resIntrinsic = NI_SSE2_X64_ConvertToInt64;
1486                         break;
1487
1488                     case TYP_ULONG:
1489                         resIntrinsic = NI_SSE2_X64_ConvertToUInt64;
1490                         break;
1491
1492                     case TYP_INT:
1493                         resIntrinsic = NI_SSE2_ConvertToInt32;
1494                         break;
1495
1496                     case TYP_UINT:
1497                         resIntrinsic = NI_SSE2_ConvertToUInt32;
1498                         break;
1499
1500                     case TYP_FLOAT:
1501                     case TYP_DOUBLE:
1502                         resIntrinsic = NI_Base_Vector128_ToScalar;
1503                         break;
1504
1505                     default:
1506                         unreached();
1507                 }
1508
1509                 return gtNewSimdHWIntrinsicNode(retType, vectorOp, resIntrinsic, baseType, 16);
1510             }
1511
1512             GenTree* immNode = gtNewIconNode(imm8);
1513
1514             switch (baseType)
1515             {
1516                 case TYP_LONG:
1517                 case TYP_ULONG:
1518                     retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_X64_Extract, baseType, 16);
1519                     break;
1520
1521                 case TYP_FLOAT:
1522                 {
1523                     if (!compSupports(InstructionSet_SSE41))
1524                     {
1525                         assert(imm8 >= 1);
1526                         assert(imm8 <= 3);
1527                         // Emulate Vector128<float>.GetElement(i) by SSE instructions
1528                         // vector.GetElement(i)
1529                         // =>
1530                         // shufps  xmm0, xmm0, control
1531                         // (xmm0 = vector, control = i + 228)
1532                         immNode->AsIntCon()->SetIconValue(228 + imm8);
1533                         GenTree* clonedVectorOp = nullptr;
1534                         vectorOp = impCloneExpr(vectorOp, &clonedVectorOp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
1535                                                 nullptr DEBUGARG("Clone Vector for Vector128<float>.GetElement"));
1536                         vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, clonedVectorOp, immNode,
1537                                                             NI_SSE_Shuffle, TYP_FLOAT, 16);
1538                         return gtNewSimdHWIntrinsicNode(retType, vectorOp, NI_Base_Vector128_ToScalar, TYP_FLOAT, 16);
1539                     }
1540                     __fallthrough;
1541                 }
1542
1543                 case TYP_UBYTE:
1544                 case TYP_INT:
1545                 case TYP_UINT:
1546                     retNode = gtNewSimdHWIntrinsicNode(retType, vectorOp, immNode, NI_SSE41_Extract, baseType, 16);
1547                     break;
1548
1549                 case TYP_BYTE:
1550                     // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result
1551                     retNode = gtNewSimdHWIntrinsicNode(TYP_UBYTE, vectorOp, immNode, NI_SSE41_Extract, TYP_UBYTE, 16);
1552                     retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_BYTE);
1553                     break;
1554
1555                 case TYP_SHORT:
1556                 case TYP_USHORT:
1557                     // We do not have SSE41/SSE2 Extract APIs on signed small int, so need a CAST on the result
1558                     retNode = gtNewSimdHWIntrinsicNode(TYP_USHORT, vectorOp, immNode, NI_SSE2_Extract, TYP_USHORT, 16);
1559                     if (baseType == TYP_SHORT)
1560                     {
1561                         retNode = gtNewCastNode(TYP_INT, retNode, true, TYP_SHORT);
1562                     }
1563                     break;
1564
1565                 case TYP_DOUBLE:
1566                     assert(imm8 == 1);
1567                     // vector.GetElement(1)
1568                     // =>
1569                     // pshufd xmm1, xmm0, 0xEE (xmm0 = vector)
1570                     vectorOp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(0xEE), NI_SSE2_Shuffle,
1571                                                         TYP_INT, 16);
1572                     retNode =
1573                         gtNewSimdHWIntrinsicNode(TYP_DOUBLE, vectorOp, NI_Base_Vector128_ToScalar, TYP_DOUBLE, 16);
1574                     break;
1575
1576                 default:
1577                     unreached();
1578             }
1579
1580             break;
1581         }
1582
1583         default:
1584         {
1585             unreached();
1586             break;
1587         }
1588     }
1589
1590     return retNode;
1591 }
1592
1593 GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
1594                                    CORINFO_METHOD_HANDLE method,
1595                                    CORINFO_SIG_INFO*     sig,
1596                                    bool                  mustExpand)
1597 {
1598     GenTree* retNode  = nullptr;
1599     GenTree* op1      = nullptr;
1600     GenTree* op2      = nullptr;
1601     GenTree* op3      = nullptr;
1602     GenTree* op4      = nullptr;
1603     int      simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
1604
1605     // The Prefetch and StoreFence intrinsics don't take any SIMD operands
1606     // and have a simdSize of 0
1607     assert((simdSize == 16) || (simdSize == 0));
1608
1609     switch (intrinsic)
1610     {
1611         case NI_SSE_Prefetch0:
1612         case NI_SSE_Prefetch1:
1613         case NI_SSE_Prefetch2:
1614         case NI_SSE_PrefetchNonTemporal:
1615         {
1616             assert(sig->numArgs == 1);
1617             assert(JITtype2varType(sig->retType) == TYP_VOID);
1618             op1     = impPopStack().val;
1619             retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, intrinsic, TYP_UBYTE, 0);
1620             break;
1621         }
1622
1623         case NI_SSE_StoreFence:
1624             assert(sig->numArgs == 0);
1625             assert(JITtype2varType(sig->retType) == TYP_VOID);
1626             retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, 0);
1627             break;
1628
1629         default:
1630             JITDUMP("Not implemented hardware intrinsic");
1631             break;
1632     }
1633     return retNode;
1634 }
1635
1636 GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
1637                                     CORINFO_METHOD_HANDLE method,
1638                                     CORINFO_SIG_INFO*     sig,
1639                                     bool                  mustExpand)
1640 {
1641     GenTree*  retNode  = nullptr;
1642     GenTree*  op1      = nullptr;
1643     GenTree*  op2      = nullptr;
1644     int       ival     = -1;
1645     int       simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
1646     var_types baseType = TYP_UNKNOWN;
1647     var_types retType  = TYP_UNKNOWN;
1648
1649     // The  fencing intrinsics don't take any operands and simdSize is 0
1650     assert((simdSize == 16) || (simdSize == 0));
1651
1652     CORINFO_ARG_LIST_HANDLE argList = sig->args;
1653     var_types               argType = TYP_UNKNOWN;
1654
1655     switch (intrinsic)
1656     {
1657         case NI_SSE2_CompareLessThan:
1658         {
1659             assert(sig->numArgs == 2);
1660             op2      = impSIMDPopStack(TYP_SIMD16);
1661             op1      = impSIMDPopStack(TYP_SIMD16);
1662             baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1663             if (baseType == TYP_DOUBLE)
1664             {
1665                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, baseType, simdSize);
1666             }
1667             else
1668             {
1669                 retNode =
1670                     gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareGreaterThan, baseType, simdSize);
1671             }
1672             break;
1673         }
1674
1675         case NI_SSE2_LoadFence:
1676         case NI_SSE2_MemoryFence:
1677         {
1678             assert(sig->numArgs == 0);
1679             assert(JITtype2varType(sig->retType) == TYP_VOID);
1680             assert(simdSize == 0);
1681
1682             retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, simdSize);
1683             break;
1684         }
1685
1686         case NI_SSE2_StoreNonTemporal:
1687         {
1688             assert(sig->numArgs == 2);
1689             assert(JITtype2varType(sig->retType) == TYP_VOID);
1690             op2     = impPopStack().val;
1691             op1     = impPopStack().val;
1692             retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0);
1693             break;
1694         }
1695
1696         default:
1697             JITDUMP("Not implemented hardware intrinsic");
1698             break;
1699     }
1700     return retNode;
1701 }
1702
1703 GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic        intrinsic,
1704                                      CORINFO_METHOD_HANDLE method,
1705                                      CORINFO_SIG_INFO*     sig,
1706                                      bool                  mustExpand)
1707 {
1708     GenTree*  retNode  = nullptr;
1709     GenTree*  op1      = nullptr;
1710     GenTree*  op2      = nullptr;
1711     var_types callType = JITtype2varType(sig->retType);
1712
1713     CORINFO_ARG_LIST_HANDLE argList = sig->args;
1714     CORINFO_CLASS_HANDLE    argClass;
1715     CorInfoType             corType;
1716     switch (intrinsic)
1717     {
1718         case NI_SSE42_Crc32:
1719         case NI_SSE42_X64_Crc32:
1720             assert(sig->numArgs == 2);
1721             op2     = impPopStack().val;
1722             op1     = impPopStack().val;
1723             argList = info.compCompHnd->getArgNext(argList);                        // the second argument
1724             corType = strip(info.compCompHnd->getArgType(sig, argList, &argClass)); // type of the second argument
1725
1726             retNode = gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
1727
1728             // TODO - currently we use the BaseType to bring the type of the second argument
1729             // to the code generator. May encode the overload info in other way.
1730             retNode->gtHWIntrinsic.gtSIMDBaseType = JITtype2varType(corType);
1731             break;
1732
1733         default:
1734             JITDUMP("Not implemented hardware intrinsic");
1735             break;
1736     }
1737     return retNode;
1738 }
1739
1740 GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic        intrinsic,
1741                                          CORINFO_METHOD_HANDLE method,
1742                                          CORINFO_SIG_INFO*     sig,
1743                                          bool                  mustExpand)
1744 {
1745     GenTree*  retNode  = nullptr;
1746     GenTree*  op1      = nullptr;
1747     GenTree*  op2      = nullptr;
1748     var_types baseType = TYP_UNKNOWN;
1749     int       simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
1750
1751     switch (intrinsic)
1752     {
1753         case NI_AVX2_PermuteVar8x32:
1754         {
1755             baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1756             // swap the two operands
1757             GenTree* indexVector  = impSIMDPopStack(TYP_SIMD32);
1758             GenTree* sourceVector = impSIMDPopStack(TYP_SIMD32);
1759             retNode =
1760                 gtNewSimdHWIntrinsicNode(TYP_SIMD32, indexVector, sourceVector, NI_AVX2_PermuteVar8x32, baseType, 32);
1761             break;
1762         }
1763
1764         case NI_AVX2_GatherMaskVector128:
1765         case NI_AVX2_GatherMaskVector256:
1766         {
1767             CORINFO_ARG_LIST_HANDLE argList = sig->args;
1768             CORINFO_CLASS_HANDLE    argClass;
1769             var_types               argType = TYP_UNKNOWN;
1770             unsigned int            sizeBytes;
1771             baseType          = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
1772             var_types retType = getSIMDTypeForSize(sizeBytes);
1773
1774             assert(sig->numArgs == 5);
1775             CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
1776             CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
1777             CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3);
1778             CORINFO_ARG_LIST_HANDLE arg5 = info.compCompHnd->getArgNext(arg4);
1779
1780             argType      = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg5, &argClass)));
1781             GenTree* op5 = getArgForHWIntrinsic(argType, argClass);
1782             SetOpLclRelatedToSIMDIntrinsic(op5);
1783
1784             argType      = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass)));
1785             GenTree* op4 = getArgForHWIntrinsic(argType, argClass);
1786             SetOpLclRelatedToSIMDIntrinsic(op4);
1787
1788             argType                 = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
1789             var_types indexbaseType = getBaseTypeOfSIMDType(argClass);
1790             GenTree*  op3           = getArgForHWIntrinsic(argType, argClass);
1791             SetOpLclRelatedToSIMDIntrinsic(op3);
1792
1793             argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
1794             op2     = getArgForHWIntrinsic(argType, argClass);
1795             SetOpLclRelatedToSIMDIntrinsic(op2);
1796
1797             argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
1798             op1     = getArgForHWIntrinsic(argType, argClass);
1799             SetOpLclRelatedToSIMDIntrinsic(op1);
1800
1801             GenTree* opList = new (this, GT_LIST) GenTreeArgList(op1, gtNewArgList(op2, op3, op4, op5));
1802             retNode = new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(retType, opList, intrinsic, baseType, simdSize);
1803             retNode->AsHWIntrinsic()->gtIndexBaseType = indexbaseType;
1804             break;
1805         }
1806
1807         default:
1808             JITDUMP("Not implemented hardware intrinsic");
1809             break;
1810     }
1811     return retNode;
1812 }
1813
1814 GenTree* Compiler::impAESIntrinsic(NamedIntrinsic        intrinsic,
1815                                    CORINFO_METHOD_HANDLE method,
1816                                    CORINFO_SIG_INFO*     sig,
1817                                    bool                  mustExpand)
1818 {
1819     return nullptr;
1820 }
1821
1822 GenTree* Compiler::impBMI1OrBMI2Intrinsic(NamedIntrinsic        intrinsic,
1823                                           CORINFO_METHOD_HANDLE method,
1824                                           CORINFO_SIG_INFO*     sig,
1825                                           bool                  mustExpand)
1826 {
1827     var_types callType = JITtype2varType(sig->retType);
1828
1829     switch (intrinsic)
1830     {
1831         case NI_BMI1_AndNot:
1832         case NI_BMI1_X64_AndNot:
1833         case NI_BMI2_ParallelBitDeposit:
1834         case NI_BMI2_ParallelBitExtract:
1835         case NI_BMI2_X64_ParallelBitDeposit:
1836         case NI_BMI2_X64_ParallelBitExtract:
1837         {
1838             assert(sig->numArgs == 2);
1839
1840             GenTree* op2 = impPopStack().val;
1841             GenTree* op1 = impPopStack().val;
1842
1843             return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
1844         }
1845
1846         case NI_BMI2_ZeroHighBits:
1847         case NI_BMI2_X64_ZeroHighBits:
1848         {
1849             assert(sig->numArgs == 2);
1850
1851             GenTree* op2 = impPopStack().val;
1852             GenTree* op1 = impPopStack().val;
1853             // Instruction BZHI requires to encode op2 (3rd register) in VEX.vvvv and op1 maybe memory operand,
1854             // so swap op1 and op2 to unify the backend code.
1855             return gtNewScalarHWIntrinsicNode(callType, op2, op1, intrinsic);
1856         }
1857
1858         case NI_BMI1_ExtractLowestSetBit:
1859         case NI_BMI1_GetMaskUpToLowestSetBit:
1860         case NI_BMI1_ResetLowestSetBit:
1861         case NI_BMI1_TrailingZeroCount:
1862         case NI_BMI1_X64_ExtractLowestSetBit:
1863         case NI_BMI1_X64_GetMaskUpToLowestSetBit:
1864         case NI_BMI1_X64_ResetLowestSetBit:
1865         case NI_BMI1_X64_TrailingZeroCount:
1866         {
1867             assert(sig->numArgs == 1);
1868             GenTree* op1 = impPopStack().val;
1869             return gtNewScalarHWIntrinsicNode(callType, op1, intrinsic);
1870         }
1871
1872         case NI_BMI1_BitFieldExtract:
1873         case NI_BMI1_X64_BitFieldExtract:
1874         {
1875             // The 3-arg version is implemented in managed code
1876             if (sig->numArgs == 3)
1877             {
1878                 return nullptr;
1879             }
1880             assert(sig->numArgs == 2);
1881
1882             GenTree* op2 = impPopStack().val;
1883             GenTree* op1 = impPopStack().val;
1884             // Instruction BEXTR requires to encode op2 (3rd register) in VEX.vvvv and op1 maybe memory operand,
1885             // so swap op1 and op2 to unify the backend code.
1886             return gtNewScalarHWIntrinsicNode(callType, op2, op1, intrinsic);
1887         }
1888
1889         case NI_BMI2_MultiplyNoFlags:
1890         case NI_BMI2_X64_MultiplyNoFlags:
1891         {
1892             assert(sig->numArgs == 2 || sig->numArgs == 3);
1893             GenTree* op3 = nullptr;
1894             if (sig->numArgs == 3)
1895             {
1896                 op3 = impPopStack().val;
1897             }
1898
1899             GenTree* op2 = impPopStack().val;
1900             GenTree* op1 = impPopStack().val;
1901
1902             if (sig->numArgs == 3)
1903             {
1904                 return gtNewScalarHWIntrinsicNode(callType, op1, op2, op3, intrinsic);
1905             }
1906             else
1907             {
1908                 return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
1909             }
1910         }
1911
1912         default:
1913         {
1914             unreached();
1915             return nullptr;
1916         }
1917     }
1918 }
1919
1920 GenTree* Compiler::impFMAIntrinsic(NamedIntrinsic        intrinsic,
1921                                    CORINFO_METHOD_HANDLE method,
1922                                    CORINFO_SIG_INFO*     sig,
1923                                    bool                  mustExpand)
1924 {
1925     return nullptr;
1926 }
1927
1928 GenTree* Compiler::impLZCNTIntrinsic(NamedIntrinsic        intrinsic,
1929                                      CORINFO_METHOD_HANDLE method,
1930                                      CORINFO_SIG_INFO*     sig,
1931                                      bool                  mustExpand)
1932 {
1933     assert(sig->numArgs == 1);
1934     var_types callType = JITtype2varType(sig->retType);
1935     return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, intrinsic);
1936 }
1937
1938 GenTree* Compiler::impPCLMULQDQIntrinsic(NamedIntrinsic        intrinsic,
1939                                          CORINFO_METHOD_HANDLE method,
1940                                          CORINFO_SIG_INFO*     sig,
1941                                          bool                  mustExpand)
1942 {
1943     return nullptr;
1944 }
1945
1946 GenTree* Compiler::impPOPCNTIntrinsic(NamedIntrinsic        intrinsic,
1947                                       CORINFO_METHOD_HANDLE method,
1948                                       CORINFO_SIG_INFO*     sig,
1949                                       bool                  mustExpand)
1950 {
1951     assert(sig->numArgs == 1);
1952     var_types callType = JITtype2varType(sig->retType);
1953     return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, intrinsic);
1954 }
1955
1956 #endif // FEATURE_HW_INTRINSICS