Merge pull request #15897 from tannergooding/hwintrin-debuggerdisplay
[platform/upstream/coreclr.git] / src / jit / hwintrinsicxarch.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 #include "jitpch.h"
6
7 #if FEATURE_HW_INTRINSICS
8
9 struct HWIntrinsicInfo
10 {
11     NamedIntrinsic intrinsicID;
12     const char*    intrinsicName;
13     InstructionSet isa;
14 }
15
16 static const hwIntrinsicInfoArray[] = {
17 #define HARDWARE_INTRINSIC(id, name, isa) {NI_##id, name, InstructionSet_##isa},
18 #include "hwintrinsiclistxarch.h"
19 };
20
21 extern const char* getHWIntrinsicName(NamedIntrinsic intrinsic)
22 {
23     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].intrinsicName;
24 }
25
26 //------------------------------------------------------------------------
27 // lookupHWIntrinsicISA: map class name to InstructionSet value
28 //
29 // Arguments:
30 //    className -- class name in System.Runtime.Intrinsics.X86
31 //
32 // Return Value:
33 //    Id for the ISA class.
34 //
35 InstructionSet Compiler::lookupHWIntrinsicISA(const char* className)
36 {
37     if (className != nullptr)
38     {
39         if (className[0] == 'A')
40         {
41             if (strcmp(className, "Aes") == 0)
42             {
43                 return InstructionSet_AES;
44             }
45             else if (strcmp(className, "Avx") == 0)
46             {
47                 return InstructionSet_AVX;
48             }
49             else if (strcmp(className, "Avx2") == 0)
50             {
51                 return InstructionSet_AVX2;
52             }
53         }
54         if (className[0] == 'S')
55         {
56             if (strcmp(className, "Sse") == 0)
57             {
58                 return InstructionSet_SSE;
59             }
60             else if (strcmp(className, "Sse2") == 0)
61             {
62                 return InstructionSet_SSE2;
63             }
64             else if (strcmp(className, "Sse3") == 0)
65             {
66                 return InstructionSet_SSE3;
67             }
68             else if (strcmp(className, "Ssse3") == 0)
69             {
70                 return InstructionSet_SSSE3;
71             }
72             else if (strcmp(className, "Sse41") == 0)
73             {
74                 return InstructionSet_SSE41;
75             }
76             else if (strcmp(className, "Sse42") == 0)
77             {
78                 return InstructionSet_SSE42;
79             }
80         }
81
82         if (strcmp(className, "Bmi1") == 0)
83         {
84             return InstructionSet_BMI1;
85         }
86         else if (strcmp(className, "Bmi2") == 0)
87         {
88             return InstructionSet_BMI2;
89         }
90         else if (strcmp(className, "Fma") == 0)
91         {
92             return InstructionSet_FMA;
93         }
94         else if (strcmp(className, "Lzcnt") == 0)
95         {
96             return InstructionSet_LZCNT;
97         }
98         else if (strcmp(className, "Pclmulqdq") == 0)
99         {
100             return InstructionSet_PCLMULQDQ;
101         }
102         else if (strcmp(className, "Popcnt") == 0)
103         {
104             return InstructionSet_POPCNT;
105         }
106     }
107
108     JITDUMP("Unsupported ISA.\n");
109     return InstructionSet_ILLEGAL;
110 }
111
112 //------------------------------------------------------------------------
113 // lookupHWIntrinsic: map intrinsic name to named intrinsic value
114 //
115 // Arguments:
116 //    methodName -- name of the intrinsic function.
117 //    isa        -- instruction set of the intrinsic.
118 //
119 // Return Value:
120 //    Id for the hardware intrinsic.
121 //
122 // TODO-Throughput: replace sequential search by binary search
123 NamedIntrinsic Compiler::lookupHWIntrinsic(const char* methodName, InstructionSet isa)
124 {
125     NamedIntrinsic result = NI_Illegal;
126     if (isa != InstructionSet_ILLEGAL)
127     {
128         for (int i = 0; i < NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START; i++)
129         {
130             if (isa == hwIntrinsicInfoArray[i].isa && strcmp(methodName, hwIntrinsicInfoArray[i].intrinsicName) == 0)
131             {
132                 result = hwIntrinsicInfoArray[i].intrinsicID;
133             }
134         }
135     }
136     return result;
137 }
138
139 //------------------------------------------------------------------------
140 // isaOfHWIntrinsic: map named intrinsic value to its instruction set
141 //
142 // Arguments:
143 //    intrinsic -- id of the intrinsic function.
144 //
145 // Return Value:
146 //    instruction set of the intrinsic.
147 //
148 InstructionSet Compiler::isaOfHWIntrinsic(NamedIntrinsic intrinsic)
149 {
150     assert(intrinsic != NI_Illegal);
151     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
152     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].isa;
153 }
154
155 //------------------------------------------------------------------------
156 // ivalOfHWIntrinsic: get the imm8 value of the given intrinsic
157 //
158 // Arguments:
159 //    intrinsic -- id of the intrinsic function.
160 //
161 // Return Value:
162 //     the imm8 value of the intrinsic, -1 for non-IMM intrinsics
163 //
164 int Compiler::ivalOfHWIntrinsic(NamedIntrinsic intrinsic)
165 {
166     assert(intrinsic != NI_Illegal);
167     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
168
169     switch (intrinsic)
170     {
171         case NI_SSE_CompareEqual:
172         case NI_SSE_CompareEqualScalar:
173             return 0;
174
175         case NI_SSE_CompareLessThan:
176         case NI_SSE_CompareLessThanScalar:
177         case NI_SSE_CompareNotGreaterThanOrEqual:
178         case NI_SSE_CompareNotGreaterThanOrEqualScalar:
179             return 1;
180
181         case NI_SSE_CompareLessThanOrEqual:
182         case NI_SSE_CompareLessThanOrEqualScalar:
183         case NI_SSE_CompareNotGreaterThan:
184         case NI_SSE_CompareNotGreaterThanScalar:
185             return 2;
186
187         case NI_SSE_CompareUnordered:
188         case NI_SSE_CompareUnorderedScalar:
189             return 3;
190
191         case NI_SSE_CompareNotEqual:
192         case NI_SSE_CompareNotEqualScalar:
193             return 4;
194
195         case NI_SSE_CompareGreaterThanOrEqual:
196         case NI_SSE_CompareGreaterThanOrEqualScalar:
197         case NI_SSE_CompareNotLessThan:
198         case NI_SSE_CompareNotLessThanScalar:
199             return 5;
200
201         case NI_SSE_CompareGreaterThan:
202         case NI_SSE_CompareGreaterThanScalar:
203         case NI_SSE_CompareNotLessThanOrEqual:
204         case NI_SSE_CompareNotLessThanOrEqualScalar:
205             return 6;
206
207         case NI_SSE_CompareOrdered:
208         case NI_SSE_CompareOrderedScalar:
209             return 7;
210
211         default:
212             return -1;
213     }
214 }
215
216 //------------------------------------------------------------------------
217 // insOfHWIntrinsic: get the instruction of the given intrinsic
218 //
219 // Arguments:
220 //    intrinsic -- id of the intrinsic function.
221 //    type      -- vector base type of this intrinsic
222 //
223 // Return Value:
224 //     the instruction of the given intrinsic on the base type
225 //     return INS_invalid for unsupported base types
226 //
227 instruction Compiler::insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type)
228 {
229     assert(intrinsic != NI_Illegal);
230     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
231
232     switch (intrinsic)
233     {
234         case NI_SSE_Add:
235             return INS_addps;
236
237         case NI_SSE_AddScalar:
238             return INS_addss;
239
240         case NI_SSE_And:
241             return INS_andps;
242
243         case NI_SSE_AndNot:
244             return INS_andnps;
245
246         case NI_SSE_CompareEqual:
247         case NI_SSE_CompareGreaterThan:
248         case NI_SSE_CompareGreaterThanOrEqual:
249         case NI_SSE_CompareLessThan:
250         case NI_SSE_CompareLessThanOrEqual:
251         case NI_SSE_CompareNotEqual:
252         case NI_SSE_CompareNotGreaterThan:
253         case NI_SSE_CompareNotGreaterThanOrEqual:
254         case NI_SSE_CompareNotLessThan:
255         case NI_SSE_CompareNotLessThanOrEqual:
256         case NI_SSE_CompareOrdered:
257         case NI_SSE_CompareUnordered:
258             return INS_cmpps;
259
260         case NI_SSE_CompareEqualScalar:
261         case NI_SSE_CompareGreaterThanScalar:
262         case NI_SSE_CompareGreaterThanOrEqualScalar:
263         case NI_SSE_CompareLessThanScalar:
264         case NI_SSE_CompareLessThanOrEqualScalar:
265         case NI_SSE_CompareNotEqualScalar:
266         case NI_SSE_CompareNotGreaterThanScalar:
267         case NI_SSE_CompareNotGreaterThanOrEqualScalar:
268         case NI_SSE_CompareNotLessThanScalar:
269         case NI_SSE_CompareNotLessThanOrEqualScalar:
270         case NI_SSE_CompareOrderedScalar:
271         case NI_SSE_CompareUnorderedScalar:
272             return INS_cmpss;
273
274         case NI_SSE_CompareEqualOrderedScalar:
275         case NI_SSE_CompareGreaterThanOrderedScalar:
276         case NI_SSE_CompareGreaterThanOrEqualOrderedScalar:
277         case NI_SSE_CompareLessThanOrderedScalar:
278         case NI_SSE_CompareLessThanOrEqualOrderedScalar:
279         case NI_SSE_CompareNotEqualOrderedScalar:
280             return INS_comiss;
281
282         case NI_SSE_CompareEqualUnorderedScalar:
283         case NI_SSE_CompareGreaterThanUnorderedScalar:
284         case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
285         case NI_SSE_CompareLessThanUnorderedScalar:
286         case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
287         case NI_SSE_CompareNotEqualUnorderedScalar:
288             return INS_ucomiss;
289
290         case NI_SSE_ConvertToInt32:
291         case NI_SSE_ConvertToInt64:
292             return INS_cvtss2si;
293
294         case NI_SSE_ConvertToInt32WithTruncation:
295         case NI_SSE_ConvertToInt64WithTruncation:
296             return INS_cvttss2si;
297
298         case NI_SSE_ConvertToSingle:
299         case NI_SSE_LoadScalar:
300         case NI_SSE_MoveScalar:
301             return INS_movss;
302
303         case NI_SSE_ConvertToVector128SingleScalar:
304             return INS_cvtsi2ss;
305
306         case NI_SSE_Divide:
307             return INS_divps;
308
309         case NI_SSE_DivideScalar:
310             return INS_divss;
311
312         case NI_SSE_LoadAlignedVector128:
313         case NI_SSE_StaticCast:
314             return INS_movaps;
315
316         case NI_SSE_LoadHigh:
317             return INS_movhps;
318
319         case NI_SSE_LoadLow:
320             return INS_movlps;
321
322         case NI_SSE_LoadVector128:
323             return INS_movups;
324
325         case NI_SSE_Max:
326             return INS_maxps;
327
328         case NI_SSE_MaxScalar:
329             return INS_maxss;
330
331         case NI_SSE_Min:
332             return INS_minps;
333
334         case NI_SSE_MinScalar:
335             return INS_minss;
336
337         case NI_SSE_MoveHighToLow:
338             return INS_movhlps;
339
340         case NI_SSE_MoveLowToHigh:
341             return INS_movlhps;
342
343         case NI_SSE_MoveMask:
344             return INS_movmskps;
345
346         case NI_SSE_Multiply:
347             return INS_mulps;
348
349         case NI_SSE_MultiplyScalar:
350             return INS_mulss;
351
352         case NI_SSE_Or:
353             return INS_orps;
354
355         case NI_SSE_Reciprocal:
356             return INS_rcpps;
357
358         case NI_SSE_ReciprocalScalar:
359             return INS_rcpss;
360
361         case NI_SSE_ReciprocalSqrt:
362             return INS_rsqrtps;
363
364         case NI_SSE_ReciprocalSqrtScalar:
365             return INS_rsqrtss;
366
367         case NI_SSE_Sqrt:
368             return INS_sqrtps;
369
370         case NI_SSE_SqrtScalar:
371             return INS_sqrtss;
372
373         case NI_SSE_Subtract:
374             return INS_subps;
375
376         case NI_SSE_SubtractScalar:
377             return INS_subss;
378
379         case NI_SSE_UnpackHigh:
380             return INS_unpckhps;
381
382         case NI_SSE_UnpackLow:
383             return INS_unpcklps;
384
385         case NI_SSE_Xor:
386             return INS_xorps;
387
388         default:
389             return INS_invalid;
390     }
391 }
392
393 //------------------------------------------------------------------------
394 // isIntrinsicAnIsSupportedPropertyGetter: return true if the intrinsic is "get_IsSupported"
395 //
396 // Arguments:
397 //    intrinsic -- id of the intrinsic function.
398 //
399 // Return Value:
400 //    true if the intrinsic is "get_IsSupported"
401 //    Sometimes we need to specially treat "get_IsSupported"
402 bool Compiler::isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic)
403 {
404     switch (intrinsic)
405     {
406         case NI_SSE_IsSupported:
407         case NI_SSE2_IsSupported:
408         case NI_SSE3_IsSupported:
409         case NI_SSSE3_IsSupported:
410         case NI_SSE41_IsSupported:
411         case NI_SSE42_IsSupported:
412         case NI_AVX_IsSupported:
413         case NI_AVX2_IsSupported:
414         case NI_AES_IsSupported:
415         case NI_BMI1_IsSupported:
416         case NI_BMI2_IsSupported:
417         case NI_FMA_IsSupported:
418         case NI_LZCNT_IsSupported:
419         case NI_PCLMULQDQ_IsSupported:
420         case NI_POPCNT_IsSupported:
421             return true;
422         default:
423             return false;
424     }
425 }
426
427 //------------------------------------------------------------------------
428 // isFullyImplmentedISAClass: return true if all the hardware intrinsics
429 //    of this ISA are implemented in RyuJIT.
430 //
431 // Arguments:
432 //    isa - Instruction set
433 // Return Value:
434 //    true - all the hardware intrinsics of "isa" are implemented in RyuJIT.
435 //
436 bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
437 {
438     switch (isa)
439     {
440         case InstructionSet_SSE:
441         case InstructionSet_SSE2:
442         case InstructionSet_SSE3:
443         case InstructionSet_SSSE3:
444         case InstructionSet_SSE41:
445         case InstructionSet_SSE42:
446         case InstructionSet_AVX:
447         case InstructionSet_AVX2:
448         case InstructionSet_AES:
449         case InstructionSet_BMI1:
450         case InstructionSet_BMI2:
451         case InstructionSet_FMA:
452         case InstructionSet_PCLMULQDQ:
453             return false;
454
455         case InstructionSet_LZCNT:
456         case InstructionSet_POPCNT:
457             return true;
458
459         default:
460             unreached();
461     }
462 }
463
464 //------------------------------------------------------------------------
465 // isScalarISA:
466 //
467 // Arguments:
468 //    isa - Instruction set
469 // Return Value:
470 //    true - if "isa" only contains scalar instructions
471 //
472 bool Compiler::isScalarISA(InstructionSet isa)
473 {
474     switch (isa)
475     {
476         case InstructionSet_BMI1:
477         case InstructionSet_BMI2:
478         case InstructionSet_LZCNT:
479         case InstructionSet_POPCNT:
480             return true;
481
482         default:
483             return false;
484     }
485 }
486
487 //------------------------------------------------------------------------
488 // compSupportsHWIntrinsic: compiler support of hardware intrinsics
489 //
490 // Arguments:
491 //    isa - Instruction set
492 // Return Value:
493 //    true if
494 //    - isa is a scalar ISA
495 //    - isa is a SIMD ISA and featureSIMD=true
496 //    - isa is fully implemented or EnableIncompleteISAClass=true
497 bool Compiler::compSupportsHWIntrinsic(InstructionSet isa)
498 {
499     return (featureSIMD || isScalarISA(isa)) && (
500 #ifdef DEBUG
501                                                     JitConfig.EnableIncompleteISAClass() ||
502 #endif
503                                                     isFullyImplmentedISAClass(isa));
504 }
505
506 //------------------------------------------------------------------------
507 // impUnsupportedHWIntrinsic: returns a node for an unsupported HWIntrinsic
508 //
509 // Arguments:
510 //    helper     - JIT helper ID for the exception to be thrown
511 //    method     - method handle of the intrinsic function.
512 //    sig        - signature of the intrinsic call
513 //    mustExpand - true if the intrinsic must return a GenTree*; otherwise, false
514 //
515 // Return Value:
516 //    a gtNewMustThrowException if mustExpand is true; otherwise, nullptr
517 //
518 GenTree* Compiler::impUnsupportedHWIntrinsic(unsigned              helper,
519                                              CORINFO_METHOD_HANDLE method,
520                                              CORINFO_SIG_INFO*     sig,
521                                              bool                  mustExpand)
522 {
523     // We've hit some error case and may need to return a node for the given error.
524     //
525     // When `mustExpand=false`, we are attempting to inline the intrinsic directly into another method. In this
526     // scenario, we need to return `nullptr` so that a GT_CALL to the intrinsic is emitted instead. This is to
527     // ensure that everything continues to behave correctly when optimizations are enabled (e.g. things like the
528     // inliner may expect the node we return to have a certain signature, and the `MustThrowException` node won't
529     // match that).
530     //
531     // When `mustExpand=true`, we are in a GT_CALL to the intrinsic and are attempting to JIT it. This will generally
532     // be in response to an indirect call (e.g. done via reflection) or in response to an earlier attempt returning
533     // `nullptr` (under `mustExpand=false`). In that scenario, we are safe to return the `MustThrowException` node.
534
535     if (mustExpand)
536     {
537         for (unsigned i = 0; i < sig->numArgs; i++)
538         {
539             impPopStack();
540         }
541
542         return gtNewMustThrowException(helper, JITtype2varType(sig->retType), sig->retTypeClass);
543     }
544     else
545     {
546         return nullptr;
547     }
548 }
549
550 //------------------------------------------------------------------------
551 // impX86HWIntrinsic: dispatch hardware intrinsics to their own implementation
552 // function
553 //
554 // Arguments:
555 //    intrinsic -- id of the intrinsic function.
556 //    method    -- method handle of the intrinsic function.
557 //    sig       -- signature of the intrinsic call
558 //
559 // Return Value:
560 //    the expanded intrinsic.
561 //
562 GenTree* Compiler::impX86HWIntrinsic(NamedIntrinsic        intrinsic,
563                                      CORINFO_METHOD_HANDLE method,
564                                      CORINFO_SIG_INFO*     sig,
565                                      bool                  mustExpand)
566 {
567     InstructionSet isa = isaOfHWIntrinsic(intrinsic);
568
569     // This intrinsic is supported if
570     // - the ISA is available on the underlying hardware (compSupports returns true)
571     // - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
572     bool issupported = compSupports(isa) && compSupportsHWIntrinsic(isa);
573
574     if (isIntrinsicAnIsSupportedPropertyGetter(intrinsic))
575     {
576         return gtNewIconNode(issupported);
577     }
578     else if (!issupported)
579     {
580         return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
581     }
582
583     switch (isa)
584     {
585         case InstructionSet_SSE:
586             return impSSEIntrinsic(intrinsic, method, sig, mustExpand);
587         case InstructionSet_SSE2:
588             return impSSE2Intrinsic(intrinsic, method, sig, mustExpand);
589         case InstructionSet_SSE3:
590             return impSSE3Intrinsic(intrinsic, method, sig, mustExpand);
591         case InstructionSet_SSSE3:
592             return impSSSE3Intrinsic(intrinsic, method, sig, mustExpand);
593         case InstructionSet_SSE41:
594             return impSSE41Intrinsic(intrinsic, method, sig, mustExpand);
595         case InstructionSet_SSE42:
596             return impSSE42Intrinsic(intrinsic, method, sig, mustExpand);
597         case InstructionSet_AVX:
598             return impAVXIntrinsic(intrinsic, method, sig, mustExpand);
599         case InstructionSet_AVX2:
600             return impAVX2Intrinsic(intrinsic, method, sig, mustExpand);
601
602         case InstructionSet_AES:
603             return impAESIntrinsic(intrinsic, method, sig, mustExpand);
604         case InstructionSet_BMI1:
605             return impBMI1Intrinsic(intrinsic, method, sig, mustExpand);
606         case InstructionSet_BMI2:
607             return impBMI2Intrinsic(intrinsic, method, sig, mustExpand);
608         case InstructionSet_FMA:
609             return impFMAIntrinsic(intrinsic, method, sig, mustExpand);
610         case InstructionSet_LZCNT:
611             return impLZCNTIntrinsic(intrinsic, method, sig, mustExpand);
612         case InstructionSet_PCLMULQDQ:
613             return impPCLMULQDQIntrinsic(intrinsic, method, sig, mustExpand);
614         case InstructionSet_POPCNT:
615             return impPOPCNTIntrinsic(intrinsic, method, sig, mustExpand);
616         default:
617             return nullptr;
618     }
619 }
620
621 CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType)
622 {
623     if (simdType == TYP_SIMD16)
624     {
625         switch (simdBaseType)
626         {
627             case TYP_FLOAT:
628                 return Vector128FloatHandle;
629             case TYP_DOUBLE:
630                 return Vector128DoubleHandle;
631             case TYP_INT:
632                 return Vector128IntHandle;
633             case TYP_USHORT:
634                 return Vector128UShortHandle;
635             case TYP_UBYTE:
636                 return Vector128UByteHandle;
637             case TYP_SHORT:
638                 return Vector128ShortHandle;
639             case TYP_BYTE:
640                 return Vector128ByteHandle;
641             case TYP_LONG:
642                 return Vector128LongHandle;
643             case TYP_UINT:
644                 return Vector128UIntHandle;
645             case TYP_ULONG:
646                 return Vector128ULongHandle;
647             default:
648                 assert(!"Didn't find a class handle for simdType");
649         }
650     }
651     else if (simdType == TYP_SIMD32)
652     {
653         switch (simdBaseType)
654         {
655             case TYP_FLOAT:
656                 return Vector256FloatHandle;
657             case TYP_DOUBLE:
658                 return Vector256DoubleHandle;
659             case TYP_INT:
660                 return Vector256IntHandle;
661             case TYP_USHORT:
662                 return Vector256UShortHandle;
663             case TYP_UBYTE:
664                 return Vector256UByteHandle;
665             case TYP_SHORT:
666                 return Vector256ShortHandle;
667             case TYP_BYTE:
668                 return Vector256ByteHandle;
669             case TYP_LONG:
670                 return Vector256LongHandle;
671             case TYP_UINT:
672                 return Vector256UIntHandle;
673             case TYP_ULONG:
674                 return Vector256ULongHandle;
675             default:
676                 assert(!"Didn't find a class handle for simdType");
677         }
678     }
679
680     return NO_CLASS_HANDLE;
681 }
682
683 GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
684                                    CORINFO_METHOD_HANDLE method,
685                                    CORINFO_SIG_INFO*     sig,
686                                    bool                  mustExpand)
687 {
688     GenTree* retNode = nullptr;
689     GenTree* op1     = nullptr;
690     GenTree* op2     = nullptr;
691     GenTree* op3     = nullptr;
692     GenTree* op4     = nullptr;
693
694     switch (intrinsic)
695     {
696         case NI_SSE_SetVector128:
697         {
698             assert(sig->numArgs == 4);
699             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
700
701             op4 = impPopStack().val;
702             op3 = impPopStack().val;
703             op2 = impPopStack().val;
704             op1 = impPopStack().val;
705
706             GenTree* left    = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE_UnpackLow, TYP_FLOAT, 16);
707             GenTree* right   = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE_UnpackLow, TYP_FLOAT, 16);
708             GenTree* control = gtNewIconNode(68, TYP_UBYTE);
709
710             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, left, right, control, NI_SSE_Shuffle, TYP_FLOAT, 16);
711             break;
712         }
713
714         case NI_SSE_Shuffle:
715         {
716             assert(sig->numArgs == 3);
717             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
718
719             op3 = impStackTop().val;
720
721             if (op3->IsCnsIntOrI() || mustExpand)
722             {
723                 impPopStack(); // Pop the value we peeked at
724                 op2     = impSIMDPopStack(TYP_SIMD16);
725                 op1     = impSIMDPopStack(TYP_SIMD16);
726                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, intrinsic, TYP_FLOAT, 16);
727             }
728             else
729             {
730                 // When op3 is not a constant and we are not being forced to expand, we need to
731                 // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
732                 // intrinsic method is recursive and will be forced to expand, at which point
733                 // we emit some less efficient fallback code.
734
735                 return nullptr;
736             }
737             break;
738         }
739
740         case NI_SSE_Add:
741         case NI_SSE_AddScalar:
742         case NI_SSE_And:
743         case NI_SSE_AndNot:
744         case NI_SSE_CompareEqual:
745         case NI_SSE_CompareEqualScalar:
746         case NI_SSE_CompareGreaterThan:
747         case NI_SSE_CompareGreaterThanScalar:
748         case NI_SSE_CompareGreaterThanOrEqual:
749         case NI_SSE_CompareGreaterThanOrEqualScalar:
750         case NI_SSE_CompareLessThan:
751         case NI_SSE_CompareLessThanScalar:
752         case NI_SSE_CompareLessThanOrEqual:
753         case NI_SSE_CompareLessThanOrEqualScalar:
754         case NI_SSE_CompareNotEqual:
755         case NI_SSE_CompareNotEqualScalar:
756         case NI_SSE_CompareNotGreaterThan:
757         case NI_SSE_CompareNotGreaterThanScalar:
758         case NI_SSE_CompareNotGreaterThanOrEqual:
759         case NI_SSE_CompareNotGreaterThanOrEqualScalar:
760         case NI_SSE_CompareNotLessThan:
761         case NI_SSE_CompareNotLessThanScalar:
762         case NI_SSE_CompareNotLessThanOrEqual:
763         case NI_SSE_CompareNotLessThanOrEqualScalar:
764         case NI_SSE_CompareOrdered:
765         case NI_SSE_CompareOrderedScalar:
766         case NI_SSE_CompareUnordered:
767         case NI_SSE_CompareUnorderedScalar:
768         case NI_SSE_Divide:
769         case NI_SSE_DivideScalar:
770         case NI_SSE_Max:
771         case NI_SSE_MaxScalar:
772         case NI_SSE_Min:
773         case NI_SSE_MinScalar:
774         case NI_SSE_MoveHighToLow:
775         case NI_SSE_MoveLowToHigh:
776         case NI_SSE_MoveScalar:
777         case NI_SSE_Multiply:
778         case NI_SSE_MultiplyScalar:
779         case NI_SSE_Or:
780         case NI_SSE_Subtract:
781         case NI_SSE_SubtractScalar:
782         case NI_SSE_UnpackHigh:
783         case NI_SSE_UnpackLow:
784         case NI_SSE_Xor:
785             assert(sig->numArgs == 2);
786             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
787             op2     = impSIMDPopStack(TYP_SIMD16);
788             op1     = impSIMDPopStack(TYP_SIMD16);
789             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
790             break;
791
792         case NI_SSE_CompareEqualOrderedScalar:
793         case NI_SSE_CompareEqualUnorderedScalar:
794         case NI_SSE_CompareGreaterThanOrderedScalar:
795         case NI_SSE_CompareGreaterThanUnorderedScalar:
796         case NI_SSE_CompareGreaterThanOrEqualOrderedScalar:
797         case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
798         case NI_SSE_CompareLessThanOrderedScalar:
799         case NI_SSE_CompareLessThanUnorderedScalar:
800         case NI_SSE_CompareLessThanOrEqualOrderedScalar:
801         case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
802         case NI_SSE_CompareNotEqualOrderedScalar:
803         case NI_SSE_CompareNotEqualUnorderedScalar:
804             assert(sig->numArgs == 2);
805             assert(JITtype2varType(sig->retType) == TYP_BOOL);
806             assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
807             op2     = impSIMDPopStack(TYP_SIMD16);
808             op1     = impSIMDPopStack(TYP_SIMD16);
809             retNode = gtNewSimdHWIntrinsicNode(TYP_BOOL, op1, op2, intrinsic, TYP_FLOAT, 16);
810             break;
811
812         case NI_SSE_ConvertToVector128SingleScalar:
813         {
814             assert(sig->numArgs == 2);
815             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
816
817 #ifdef _TARGET_X86_
818             CORINFO_CLASS_HANDLE argClass;
819
820             CORINFO_ARG_LIST_HANDLE argLst = info.compCompHnd->getArgNext(sig->args);
821             CorInfoType             corType =
822                 strip(info.compCompHnd->getArgType(sig, argLst, &argClass)); // type of the second argument
823
824             if (varTypeIsLong(JITtype2varType(corType)))
825             {
826                 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
827             }
828 #endif // _TARGET_X86_
829
830             op2     = impPopStack().val;
831             op1     = impSIMDPopStack(TYP_SIMD16);
832             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
833             break;
834         }
835
836         case NI_SSE_LoadHigh:
837         case NI_SSE_LoadLow:
838         {
839             assert(sig->numArgs == 2);
840             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
841             op2     = impPopStack().val;
842             op1     = impSIMDPopStack(TYP_SIMD16);
843             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, TYP_FLOAT, 16);
844             break;
845         }
846
847         case NI_SSE_MoveMask:
848             assert(sig->numArgs == 1);
849             assert(JITtype2varType(sig->retType) == TYP_INT);
850             assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
851             op1     = impSIMDPopStack(TYP_SIMD16);
852             retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, 16);
853             break;
854
855         case NI_SSE_StaticCast:
856         {
857             assert(sig->numArgs == 1);
858             var_types tgtType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
859             var_types srcType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
860
861             if (varTypeIsArithmetic(tgtType) && varTypeIsArithmetic(srcType))
862             {
863                 op1     = impSIMDPopStack(TYP_SIMD16);
864                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, tgtType, 16);
865             }
866             else
867             {
868                 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
869             }
870             break;
871         }
872
873         case NI_SSE_LoadAlignedVector128:
874         case NI_SSE_LoadScalar:
875         case NI_SSE_LoadVector128:
876         case NI_SSE_SetAllVector128:
877         case NI_SSE_SetScalar:
878             assert(sig->numArgs == 1);
879             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
880             op1     = impPopStack().val;
881             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16);
882             break;
883
884         case NI_SSE_Reciprocal:
885         case NI_SSE_ReciprocalScalar:
886         case NI_SSE_ReciprocalSqrt:
887         case NI_SSE_ReciprocalSqrtScalar:
888         case NI_SSE_Sqrt:
889         case NI_SSE_SqrtScalar:
890             assert(sig->numArgs == 1);
891             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
892             op1     = impSIMDPopStack(TYP_SIMD16);
893             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, intrinsic, TYP_FLOAT, 16);
894             break;
895
896         case NI_SSE_ConvertToInt32:
897         case NI_SSE_ConvertToInt32WithTruncation:
898         case NI_SSE_ConvertToInt64:
899         case NI_SSE_ConvertToInt64WithTruncation:
900         case NI_SSE_ConvertToSingle:
901         {
902             assert(sig->numArgs == 1);
903             assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
904             var_types callType = JITtype2varType(sig->retType);
905
906 #ifdef _TARGET_X86_
907             if (varTypeIsLong(callType))
908             {
909                 assert(intrinsic == NI_SSE_ConvertToInt64 || intrinsic == NI_SSE_ConvertToInt64WithTruncation);
910                 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
911             }
912 #endif // _TARGET_X86_
913
914             op1     = impSIMDPopStack(TYP_SIMD16);
915             retNode = gtNewSimdHWIntrinsicNode(callType, op1, intrinsic, TYP_FLOAT, 16);
916             break;
917         }
918
919         case NI_SSE_SetZeroVector128:
920             assert(sig->numArgs == 0);
921             assert(getBaseTypeOfSIMDType(sig->retTypeSigClass) == TYP_FLOAT);
922             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, intrinsic, TYP_FLOAT, 16);
923             break;
924
925         default:
926             JITDUMP("Not implemented hardware intrinsic");
927             break;
928     }
929     return retNode;
930 }
931
932 GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
933                                     CORINFO_METHOD_HANDLE method,
934                                     CORINFO_SIG_INFO*     sig,
935                                     bool                  mustExpand)
936 {
937     GenTree*  retNode  = nullptr;
938     GenTree*  op1      = nullptr;
939     GenTree*  op2      = nullptr;
940     var_types baseType = TYP_UNKNOWN;
941     switch (intrinsic)
942     {
943         case NI_SSE2_Add:
944             assert(sig->numArgs == 2);
945             op2      = impSIMDPopStack(TYP_SIMD16);
946             op1      = impSIMDPopStack(TYP_SIMD16);
947             baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
948             retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_Add, baseType, 16);
949             break;
950
951         default:
952             JITDUMP("Not implemented hardware intrinsic");
953             break;
954     }
955     return retNode;
956 }
957
958 GenTree* Compiler::impSSE3Intrinsic(NamedIntrinsic        intrinsic,
959                                     CORINFO_METHOD_HANDLE method,
960                                     CORINFO_SIG_INFO*     sig,
961                                     bool                  mustExpand)
962 {
963     return nullptr;
964 }
965
966 GenTree* Compiler::impSSSE3Intrinsic(NamedIntrinsic        intrinsic,
967                                      CORINFO_METHOD_HANDLE method,
968                                      CORINFO_SIG_INFO*     sig,
969                                      bool                  mustExpand)
970 {
971     return nullptr;
972 }
973
974 GenTree* Compiler::impSSE41Intrinsic(NamedIntrinsic        intrinsic,
975                                      CORINFO_METHOD_HANDLE method,
976                                      CORINFO_SIG_INFO*     sig,
977                                      bool                  mustExpand)
978 {
979     return nullptr;
980 }
981
982 GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic        intrinsic,
983                                      CORINFO_METHOD_HANDLE method,
984                                      CORINFO_SIG_INFO*     sig,
985                                      bool                  mustExpand)
986 {
987     GenTree*  retNode  = nullptr;
988     GenTree*  op1      = nullptr;
989     GenTree*  op2      = nullptr;
990     var_types callType = JITtype2varType(sig->retType);
991
992     CORINFO_ARG_LIST_HANDLE argLst = sig->args;
993     CORINFO_CLASS_HANDLE    argClass;
994     CorInfoType             corType;
995     switch (intrinsic)
996     {
997         case NI_SSE42_Crc32:
998             assert(sig->numArgs == 2);
999
1000 #ifdef _TARGET_X86_
1001             if (varTypeIsLong(callType))
1002             {
1003                 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
1004             }
1005 #endif
1006
1007             op2 = impPopStack().val;
1008             op1 = impPopStack().val;
1009
1010             argLst  = info.compCompHnd->getArgNext(argLst);                        // the second argument
1011             corType = strip(info.compCompHnd->getArgType(sig, argLst, &argClass)); // type of the second argument
1012
1013             retNode = gtNewScalarHWIntrinsicNode(callType, op1, op2, NI_SSE42_Crc32);
1014
1015             // TODO - currently we use the BaseType to bring the type of the second argument
1016             // to the code generator. May encode the overload info in other way.
1017             retNode->gtHWIntrinsic.gtSIMDBaseType = JITtype2varType(corType);
1018             break;
1019
1020         default:
1021             JITDUMP("Not implemented hardware intrinsic");
1022             break;
1023     }
1024     return retNode;
1025 }
1026
1027 GenTree* Compiler::impAVXIntrinsic(NamedIntrinsic        intrinsic,
1028                                    CORINFO_METHOD_HANDLE method,
1029                                    CORINFO_SIG_INFO*     sig,
1030                                    bool                  mustExpand)
1031 {
1032     GenTree*  retNode  = nullptr;
1033     GenTree*  op1      = nullptr;
1034     GenTree*  op2      = nullptr;
1035     var_types baseType = TYP_UNKNOWN;
1036     switch (intrinsic)
1037     {
1038         case NI_AVX_Add:
1039             assert(sig->numArgs == 2);
1040             op2      = impSIMDPopStack(TYP_SIMD32);
1041             op1      = impSIMDPopStack(TYP_SIMD32);
1042             baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1043             retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX_Add, baseType, 32);
1044             break;
1045
1046         default:
1047             JITDUMP("Not implemented hardware intrinsic");
1048             break;
1049     }
1050     return retNode;
1051 }
1052
1053 GenTree* Compiler::impAVX2Intrinsic(NamedIntrinsic        intrinsic,
1054                                     CORINFO_METHOD_HANDLE method,
1055                                     CORINFO_SIG_INFO*     sig,
1056                                     bool                  mustExpand)
1057 {
1058     GenTree*  retNode  = nullptr;
1059     GenTree*  op1      = nullptr;
1060     GenTree*  op2      = nullptr;
1061     var_types baseType = TYP_UNKNOWN;
1062     switch (intrinsic)
1063     {
1064         case NI_AVX2_Add:
1065             assert(sig->numArgs == 2);
1066             op2      = impSIMDPopStack(TYP_SIMD32);
1067             op1      = impSIMDPopStack(TYP_SIMD32);
1068             baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1069             retNode  = gtNewSimdHWIntrinsicNode(TYP_SIMD32, op1, op2, NI_AVX2_Add, baseType, 32);
1070             break;
1071
1072         default:
1073             JITDUMP("Not implemented hardware intrinsic");
1074             break;
1075     }
1076     return retNode;
1077 }
1078
1079 GenTree* Compiler::impAESIntrinsic(NamedIntrinsic        intrinsic,
1080                                    CORINFO_METHOD_HANDLE method,
1081                                    CORINFO_SIG_INFO*     sig,
1082                                    bool                  mustExpand)
1083 {
1084     return nullptr;
1085 }
1086
1087 GenTree* Compiler::impBMI1Intrinsic(NamedIntrinsic        intrinsic,
1088                                     CORINFO_METHOD_HANDLE method,
1089                                     CORINFO_SIG_INFO*     sig,
1090                                     bool                  mustExpand)
1091 {
1092     return nullptr;
1093 }
1094
1095 GenTree* Compiler::impBMI2Intrinsic(NamedIntrinsic        intrinsic,
1096                                     CORINFO_METHOD_HANDLE method,
1097                                     CORINFO_SIG_INFO*     sig,
1098                                     bool                  mustExpand)
1099 {
1100     return nullptr;
1101 }
1102
1103 GenTree* Compiler::impFMAIntrinsic(NamedIntrinsic        intrinsic,
1104                                    CORINFO_METHOD_HANDLE method,
1105                                    CORINFO_SIG_INFO*     sig,
1106                                    bool                  mustExpand)
1107 {
1108     return nullptr;
1109 }
1110
1111 GenTree* Compiler::impLZCNTIntrinsic(NamedIntrinsic        intrinsic,
1112                                      CORINFO_METHOD_HANDLE method,
1113                                      CORINFO_SIG_INFO*     sig,
1114                                      bool                  mustExpand)
1115 {
1116     assert(sig->numArgs == 1);
1117     var_types callType = JITtype2varType(sig->retType);
1118
1119 #ifdef _TARGET_X86_
1120     if (varTypeIsLong(callType))
1121     {
1122         return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
1123     }
1124 #endif
1125
1126     return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_LZCNT_LeadingZeroCount);
1127 }
1128
1129 GenTree* Compiler::impPCLMULQDQIntrinsic(NamedIntrinsic        intrinsic,
1130                                          CORINFO_METHOD_HANDLE method,
1131                                          CORINFO_SIG_INFO*     sig,
1132                                          bool                  mustExpand)
1133 {
1134     return nullptr;
1135 }
1136
1137 GenTree* Compiler::impPOPCNTIntrinsic(NamedIntrinsic        intrinsic,
1138                                       CORINFO_METHOD_HANDLE method,
1139                                       CORINFO_SIG_INFO*     sig,
1140                                       bool                  mustExpand)
1141 {
1142     assert(sig->numArgs == 1);
1143     var_types callType = JITtype2varType(sig->retType);
1144
1145 #ifdef _TARGET_X86_
1146     if (varTypeIsLong(callType))
1147     {
1148         return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
1149     }
1150 #endif
1151
1152     return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_POPCNT_PopCount);
1153 }
1154
1155 #endif // FEATURE_HW_INTRINSICS