Fix reading Time zone rules using Julian days (#17672)
[platform/upstream/coreclr.git] / src / jit / hwintrinsicxarch.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 #include "jitpch.h"
6
7 #ifdef FEATURE_HW_INTRINSICS
8
9 struct HWIntrinsicInfo
10 {
11     NamedIntrinsic      intrinsicID;
12     const char*         intrinsicName;
13     InstructionSet      isa;
14     int                 ival;
15     unsigned            simdSize;
16     int                 numArgs;
17     instruction         ins[10];
18     HWIntrinsicCategory category;
19     HWIntrinsicFlag     flags;
20 };
21
22 static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
23 // clang-format off
24 #define HARDWARE_INTRINSIC(id, name, isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
25     {NI_##id, name, InstructionSet_##isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, static_cast<HWIntrinsicFlag>(flag)},
26 // clang-format on
27 #include "hwintrinsiclistxarch.h"
28 };
29
30 extern const char* getHWIntrinsicName(NamedIntrinsic intrinsic)
31 {
32     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].intrinsicName;
33 }
34
35 //------------------------------------------------------------------------
36 // lookupHWIntrinsicISA: map class name to InstructionSet value
37 //
38 // Arguments:
39 //    className -- class name in System.Runtime.Intrinsics.X86
40 //
41 // Return Value:
42 //    Id for the ISA class.
43 //
44 InstructionSet Compiler::lookupHWIntrinsicISA(const char* className)
45 {
46     if (className != nullptr)
47     {
48         if (className[0] == 'A')
49         {
50             if (strcmp(className, "Aes") == 0)
51             {
52                 return InstructionSet_AES;
53             }
54             else if (strcmp(className, "Avx") == 0)
55             {
56                 return InstructionSet_AVX;
57             }
58             else if (strcmp(className, "Avx2") == 0)
59             {
60                 return InstructionSet_AVX2;
61             }
62         }
63         if (className[0] == 'S')
64         {
65             if (strcmp(className, "Sse") == 0)
66             {
67                 return InstructionSet_SSE;
68             }
69             else if (strcmp(className, "Sse2") == 0)
70             {
71                 return InstructionSet_SSE2;
72             }
73             else if (strcmp(className, "Sse3") == 0)
74             {
75                 return InstructionSet_SSE3;
76             }
77             else if (strcmp(className, "Ssse3") == 0)
78             {
79                 return InstructionSet_SSSE3;
80             }
81             else if (strcmp(className, "Sse41") == 0)
82             {
83                 return InstructionSet_SSE41;
84             }
85             else if (strcmp(className, "Sse42") == 0)
86             {
87                 return InstructionSet_SSE42;
88             }
89         }
90
91         if (strcmp(className, "Bmi1") == 0)
92         {
93             return InstructionSet_BMI1;
94         }
95         else if (strcmp(className, "Bmi2") == 0)
96         {
97             return InstructionSet_BMI2;
98         }
99         else if (strcmp(className, "Fma") == 0)
100         {
101             return InstructionSet_FMA;
102         }
103         else if (strcmp(className, "Lzcnt") == 0)
104         {
105             return InstructionSet_LZCNT;
106         }
107         else if (strcmp(className, "Pclmulqdq") == 0)
108         {
109             return InstructionSet_PCLMULQDQ;
110         }
111         else if (strcmp(className, "Popcnt") == 0)
112         {
113             return InstructionSet_POPCNT;
114         }
115     }
116
117     JITDUMP("Unsupported ISA.\n");
118     return InstructionSet_ILLEGAL;
119 }
120
121 //------------------------------------------------------------------------
122 // lookupHWIntrinsic: map intrinsic name to named intrinsic value
123 //
124 // Arguments:
125 //    methodName -- name of the intrinsic function.
126 //    isa        -- instruction set of the intrinsic.
127 //
128 // Return Value:
129 //    Id for the hardware intrinsic
130 //
131 // TODO-Throughput: replace sequential search by binary search
132 NamedIntrinsic Compiler::lookupHWIntrinsic(const char* methodName, InstructionSet isa)
133 {
134     NamedIntrinsic result = NI_Illegal;
135     if (isa != InstructionSet_ILLEGAL)
136     {
137         for (int i = 0; i < NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START - 1; i++)
138         {
139             if (isa == hwIntrinsicInfoArray[i].isa && strcmp(methodName, hwIntrinsicInfoArray[i].intrinsicName) == 0)
140             {
141                 result = hwIntrinsicInfoArray[i].intrinsicID;
142                 break;
143             }
144         }
145     }
146     return result;
147 }
148
149 //------------------------------------------------------------------------
150 // isaOfHWIntrinsic: map named intrinsic value to its instruction set
151 //
152 // Arguments:
153 //    intrinsic -- id of the intrinsic function.
154 //
155 // Return Value:
156 //    instruction set of the intrinsic.
157 //
158 InstructionSet Compiler::isaOfHWIntrinsic(NamedIntrinsic intrinsic)
159 {
160     assert(intrinsic != NI_Illegal);
161     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
162     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].isa;
163 }
164
165 //------------------------------------------------------------------------
166 // ivalOfHWIntrinsic: get the imm8 value of this intrinsic from the hwIntrinsicInfoArray table
167 //
168 // Arguments:
169 //    intrinsic -- id of the intrinsic function.
170 //
171 // Return Value:
172 //     The imm8 value that is implicit for this intrinsic, or -1 for intrinsics that do not take an immediate, or for
173 //     which the immediate is an explicit argument.
174 //
175 int Compiler::ivalOfHWIntrinsic(NamedIntrinsic intrinsic)
176 {
177     assert(intrinsic != NI_Illegal);
178     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
179     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].ival;
180 }
181
182 //------------------------------------------------------------------------
183 // simdSizeOfHWIntrinsic: get the SIMD size of this intrinsic
184 //
185 // Arguments:
186 //    intrinsic -- id of the intrinsic function.
187 //
188 // Return Value:
189 //     the SIMD size of this intrinsic
190 //         - from the hwIntrinsicInfoArray table if intrinsic has NO HW_Flag_UnfixedSIMDSize
191 //         - from the signature if intrinsic has HW_Flag_UnfixedSIMDSize
192 //
193 // Note - this function is only used by the importer
194 //        after importation (i.e., codegen), we can get the SIMD size from GenTreeHWIntrinsic IR
195 unsigned Compiler::simdSizeOfHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig)
196 {
197     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
198
199     HWIntrinsicFlag flags = flagsOfHWIntrinsic(intrinsic);
200
201     if ((flags & HW_Flag_UnfixedSIMDSize) == 0)
202     {
203         return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].simdSize;
204     }
205
206     CORINFO_CLASS_HANDLE typeHnd = nullptr;
207
208     if (JITtype2varType(sig->retType) == TYP_STRUCT)
209     {
210         typeHnd = sig->retTypeSigClass;
211     }
212     else
213     {
214         assert((flags & HW_Flag_BaseTypeFromFirstArg) != 0);
215         typeHnd = info.compCompHnd->getArgClass(sig, sig->args);
216     }
217
218     unsigned  simdSize = 0;
219     var_types baseType = getBaseTypeAndSizeOfSIMDType(typeHnd, &simdSize);
220     assert(simdSize > 0 && baseType != TYP_UNKNOWN);
221     return simdSize;
222 }
223
224 // TODO_XARCH-CQ - refactoring of numArgsOfHWIntrinsic fast path into inlinable
225 // function and slow local static function may increase performance significantly
226
227 //------------------------------------------------------------------------
228 // numArgsOfHWIntrinsic: gets the number of arguments for the hardware intrinsic.
229 // This attempts to do a table based lookup but will fallback to the number
230 // of operands in 'node' if the table entry is -1.
231 //
232 // Arguments:
233 //    node      -- GenTreeHWIntrinsic* node with nullptr default value
234 //
235 // Return Value:
236 //     number of arguments
237 //
238 int Compiler::numArgsOfHWIntrinsic(GenTreeHWIntrinsic* node)
239 {
240     assert(node != nullptr);
241
242     NamedIntrinsic intrinsic = node->gtHWIntrinsicId;
243
244     assert(intrinsic != NI_Illegal);
245     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
246
247     int numArgs = hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].numArgs;
248     if (numArgs >= 0)
249     {
250         return numArgs;
251     }
252
253     assert(numArgs == -1);
254
255     GenTree* op1 = node->gtGetOp1();
256     GenTree* op2 = node->gtGetOp2();
257
258     if (op2 != nullptr)
259     {
260         return 2;
261     }
262
263     if (op1 != nullptr)
264     {
265         if (op1->OperIsList())
266         {
267             numArgs              = 0;
268             GenTreeArgList* list = op1->AsArgList();
269
270             while (list != nullptr)
271             {
272                 numArgs++;
273                 list = list->Rest();
274             }
275
276             assert(numArgs > 0);
277             return numArgs;
278         }
279         else
280         {
281             return 1;
282         }
283     }
284     else
285     {
286         return 0;
287     }
288 }
289
290 //------------------------------------------------------------------------
291 // lastOpOfHWIntrinsic: get the last operand of a HW intrinsic
292 //
293 // Arguments:
294 //    node   -- the intrinsic node.
295 //    numArgs-- number of argument
296 //
297 // Return Value:
298 //     number of arguments
299 //
300 GenTree* Compiler::lastOpOfHWIntrinsic(GenTreeHWIntrinsic* node, int numArgs)
301 {
302     GenTree* op1 = node->gtGetOp1();
303     GenTree* op2 = node->gtGetOp2();
304     switch (numArgs)
305     {
306         case 0:
307             return nullptr;
308         case 1:
309             assert(op1 != nullptr);
310             return op1;
311         case 2:
312             assert(op2 != nullptr);
313             return op2;
314         case 3:
315             assert(op1->OperIsList());
316             assert(op1->AsArgList()->Rest()->Rest()->Current() != nullptr);
317             assert(op1->AsArgList()->Rest()->Rest()->Rest() == nullptr);
318             return op1->AsArgList()->Rest()->Rest()->Current();
319         default:
320             unreached();
321             return nullptr;
322     }
323 }
324
325 //------------------------------------------------------------------------
326 // insOfHWIntrinsic: get the instruction of the given intrinsic
327 //
328 // Arguments:
329 //    intrinsic -- id of the intrinsic function.
330 //    type      -- vector base type of this intrinsic
331 //
332 // Return Value:
333 //     the instruction of the given intrinsic on the base type
334 //     return INS_invalid for unsupported base types
335 //
336 instruction Compiler::insOfHWIntrinsic(NamedIntrinsic intrinsic, var_types type)
337 {
338     assert(intrinsic != NI_Illegal);
339     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
340     assert(type >= TYP_BYTE && type <= TYP_DOUBLE);
341     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].ins[type - TYP_BYTE];
342 }
343
344 //------------------------------------------------------------------------
345 // categoryOfHWIntrinsic: get the category of the given intrinsic
346 //
347 // Arguments:
348 //    intrinsic -- id of the intrinsic function.
349 //
350 // Return Value:
351 //     the category of the given intrinsic
352 //
353 HWIntrinsicCategory Compiler::categoryOfHWIntrinsic(NamedIntrinsic intrinsic)
354 {
355     assert(intrinsic != NI_Illegal);
356     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
357     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].category;
358 }
359
360 //------------------------------------------------------------------------
361 // HWIntrinsicFlag: get the flags of the given intrinsic
362 //
363 // Arguments:
364 //    intrinsic -- id of the intrinsic function.
365 //
366 // Return Value:
367 //     the flags of the given intrinsic
368 //
369 HWIntrinsicFlag Compiler::flagsOfHWIntrinsic(NamedIntrinsic intrinsic)
370 {
371     assert(intrinsic != NI_Illegal);
372     assert(intrinsic > NI_HW_INTRINSIC_START && intrinsic < NI_HW_INTRINSIC_END);
373     return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].flags;
374 }
375
376 //------------------------------------------------------------------------
377 // getArgForHWIntrinsic: get the argument from the stack and match  the signature
378 //
379 // Arguments:
380 //    argType   -- the required type of argument
381 //    argClass  -- the class handle of argType
382 //
383 // Return Value:
384 //     get the argument at the given index from the stack and match  the signature
385 //
386 GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass)
387 {
388     GenTree* arg = nullptr;
389     if (argType == TYP_STRUCT)
390     {
391         unsigned int argSizeBytes;
392         var_types    base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes);
393         argType           = getSIMDTypeForSize(argSizeBytes);
394         assert((argType == TYP_SIMD32) || (argType == TYP_SIMD16));
395         arg = impSIMDPopStack(argType);
396         assert((arg->TypeGet() == TYP_SIMD16) || (arg->TypeGet() == TYP_SIMD32));
397     }
398     else
399     {
400         assert(varTypeIsArithmetic(argType));
401         arg = impPopStack().val;
402         assert(varTypeIsArithmetic(arg->TypeGet()));
403         assert(genActualType(arg->gtType) == genActualType(argType));
404     }
405     return arg;
406 }
407
408 //------------------------------------------------------------------------
409 // immUpperBoundOfHWIntrinsic: get the max imm-value of non-full-range IMM intrinsic
410 //
411 // Arguments:
412 //    intrinsic  -- intrinsic ID
413 //
414 // Return Value:
415 //     the max imm-value of non-full-range IMM intrinsic
416 //
417 int Compiler::immUpperBoundOfHWIntrinsic(NamedIntrinsic intrinsic)
418 {
419     assert(categoryOfHWIntrinsic(intrinsic) == HW_Category_IMM);
420     switch (intrinsic)
421     {
422         case NI_AVX_Compare:
423         case NI_AVX_CompareScalar:
424             return 31; // enum FloatComparisonMode has 32 values
425
426         default:
427             assert((flagsOfHWIntrinsic(intrinsic) & HW_Flag_FullRangeIMM) != 0);
428             return 255;
429     }
430 }
431
432 //------------------------------------------------------------------------
433 // impNonConstFallback: convert certain SSE2/AVX2 shift intrinsic to its semantic alternative when the imm-arg is
434 // not a compile-time constant
435 //
436 // Arguments:
437 //    intrinsic  -- intrinsic ID
438 //    simdType   -- Vector type
439 //    baseType   -- base type of the Vector128/256<T>
440 //
441 // Return Value:
442 //     return the IR of semantic alternative on non-const imm-arg
443 //
444 GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType)
445 {
446     assert((flagsOfHWIntrinsic(intrinsic) & HW_Flag_NoJmpTableIMM) != 0);
447     switch (intrinsic)
448     {
449         case NI_SSE2_ShiftLeftLogical:
450         case NI_SSE2_ShiftRightArithmetic:
451         case NI_SSE2_ShiftRightLogical:
452         case NI_AVX2_ShiftLeftLogical:
453         case NI_AVX2_ShiftRightArithmetic:
454         case NI_AVX2_ShiftRightLogical:
455         {
456             GenTree* op2 = impPopStack().val;
457             GenTree* op1 = impSIMDPopStack(simdType);
458             GenTree* tmpOp =
459                 gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_SSE2_ConvertScalarToVector128Int32, TYP_INT, 16);
460             return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, intrinsic, baseType, genTypeSize(simdType));
461         }
462
463         default:
464             unreached();
465             return nullptr;
466     }
467 }
468
469 //------------------------------------------------------------------------
470 // isImmHWIntrinsic: check the intrinsic is a imm-intrinsic overload or not
471 //
472 // Arguments:
473 //    intrinsic  -- intrinsic ID
474 //    lastOp     -- the last operand of the intrinsic that may point to the imm-arg
475 //
476 // Return Value:
477 //        Return true iff the intrinsics is an imm-intrinsic overload.
478 //        Note: that some intrinsics, with HW_Flag_MaybeIMM set, have both imm (integer immediate) and vector (i.e.
479 //        non-TYP_INT) overloads.
480 //
481 bool Compiler::isImmHWIntrinsic(NamedIntrinsic intrinsic, GenTree* lastOp)
482 {
483     if (categoryOfHWIntrinsic(intrinsic) != HW_Category_IMM)
484     {
485         return false;
486     }
487
488     if ((flagsOfHWIntrinsic(intrinsic) & HW_Flag_MaybeIMM) != 0 && genActualType(lastOp->TypeGet()) != TYP_INT)
489     {
490         return false;
491     }
492
493     return true;
494 }
495
496 //------------------------------------------------------------------------
497 // addRangeCheckIfNeeded: add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic
498 //
499 // Arguments:
500 //    intrinsic  -- intrinsic ID
501 //    lastOp     -- the last operand of the intrinsic that points to the imm-arg
502 //    mustExpand -- true if the compiler is compiling the fallback(GT_CALL) of this intrinsics
503 //
504 // Return Value:
505 //     add a GT_HW_INTRINSIC_CHK node for non-full-range imm-intrinsic, which would throw ArgumentOutOfRangeException
506 //     when the imm-argument is not in the valid range
507 //
508 GenTree* Compiler::addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand)
509 {
510     assert(lastOp != nullptr);
511     // Full-range imm-intrinsics do not need the range-check
512     // because the imm-parameter of the intrinsic method is a byte.
513     if (mustExpand && ((flagsOfHWIntrinsic(intrinsic) & HW_Flag_FullRangeIMM) == 0) &&
514         isImmHWIntrinsic(intrinsic, lastOp))
515     {
516         assert(!lastOp->IsCnsIntOrI());
517         GenTree* upperBoundNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, immUpperBoundOfHWIntrinsic(intrinsic));
518         GenTree* index          = nullptr;
519         if ((lastOp->gtFlags & GTF_SIDE_EFFECT) != 0)
520         {
521             index = fgInsertCommaFormTemp(&lastOp);
522         }
523         else
524         {
525             index = gtCloneExpr(lastOp);
526         }
527         GenTreeBoundsChk* hwIntrinsicChk = new (this, GT_HW_INTRINSIC_CHK)
528             GenTreeBoundsChk(GT_HW_INTRINSIC_CHK, TYP_VOID, index, upperBoundNode, SCK_RNGCHK_FAIL);
529         hwIntrinsicChk->gtThrowKind = SCK_ARG_RNG_EXCPN;
530         return gtNewOperNode(GT_COMMA, lastOp->TypeGet(), hwIntrinsicChk, lastOp);
531     }
532     else
533     {
534         return lastOp;
535     }
536 }
537
538 //------------------------------------------------------------------------
539 // isFullyImplmentedISAClass: return true if all the hardware intrinsics
540 //    of this ISA are implemented in RyuJIT.
541 //
542 // Arguments:
543 //    isa - Instruction set
544 // Return Value:
545 //    true - all the hardware intrinsics of "isa" exposed in CoreFX
546 //    System.Runtime.Intrinsics.Experimental assembly are implemented in RyuJIT.
547 //
548 bool Compiler::isFullyImplmentedISAClass(InstructionSet isa)
549 {
550     switch (isa)
551     {
552         // These ISAs have no implementation
553         case InstructionSet_AES:
554         case InstructionSet_BMI1:
555         case InstructionSet_BMI2:
556         case InstructionSet_FMA:
557         case InstructionSet_PCLMULQDQ:
558             return false;
559
560         // These ISAs are partially implemented
561         case InstructionSet_AVX:
562         case InstructionSet_AVX2:
563         case InstructionSet_SSE42:
564             return true;
565
566         // These ISAs are fully implemented
567         case InstructionSet_SSE:
568         case InstructionSet_SSE2:
569         case InstructionSet_SSE3:
570         case InstructionSet_SSSE3:
571         case InstructionSet_SSE41:
572         case InstructionSet_LZCNT:
573         case InstructionSet_POPCNT:
574             return true;
575
576         default:
577             unreached();
578     }
579 }
580
581 //------------------------------------------------------------------------
582 // isScalarISA:
583 //
584 // Arguments:
585 //    isa - Instruction set
586 // Return Value:
587 //    true - if "isa" only contains scalar instructions
588 //
589 bool Compiler::isScalarISA(InstructionSet isa)
590 {
591     switch (isa)
592     {
593         case InstructionSet_BMI1:
594         case InstructionSet_BMI2:
595         case InstructionSet_LZCNT:
596         case InstructionSet_POPCNT:
597             return true;
598
599         default:
600             return false;
601     }
602 }
603
604 //------------------------------------------------------------------------
605 // compSupportsHWIntrinsic: compiler support of hardware intrinsics
606 //
607 // Arguments:
608 //    isa - Instruction set
609 // Return Value:
610 //    true if
611 //    - isa is a scalar ISA
612 //    - isa is a SIMD ISA and featureSIMD=true
613 //    - isa is fully implemented or EnableIncompleteISAClass=true
614 bool Compiler::compSupportsHWIntrinsic(InstructionSet isa)
615 {
616     return (featureSIMD || isScalarISA(isa)) && (
617 #ifdef DEBUG
618                                                     JitConfig.EnableIncompleteISAClass() ||
619 #endif
620                                                     isFullyImplmentedISAClass(isa));
621 }
622
623 //------------------------------------------------------------------------
624 // hwIntrinsicSignatureTypeSupported: platform support of hardware intrinsics
625 //
626 // Arguments:
627 //    retType - return type
628 //    sig     - intrinsic signature
629 //    flags   - flags of the intrinsics
630 //
631 // Return Value:
632 //    Returns true iff the given type signature is supported
633 // Notes:
634 //    - This is only used on 32-bit systems to determine whether the signature uses no 64-bit registers.
635 //    - The `retType` is passed to avoid another call to the type system, as it has already been retrieved.
636 bool Compiler::hwIntrinsicSignatureTypeSupported(var_types retType, CORINFO_SIG_INFO* sig, HWIntrinsicFlag flags)
637 {
638 #ifdef _TARGET_X86_
639     CORINFO_CLASS_HANDLE argClass;
640
641     if ((flags & HW_Flag_64BitOnly) != 0)
642     {
643         return false;
644     }
645     else if ((flags & HW_Flag_SecondArgMaybe64Bit) != 0)
646     {
647         assert(sig->numArgs >= 2);
648         CorInfoType corType =
649             strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(sig->args), &argClass));
650         return !varTypeIsLong(JITtype2varType(corType));
651     }
652
653     return !varTypeIsLong(retType);
654 #else
655     return true;
656 #endif
657 }
658
659 //------------------------------------------------------------------------
660 // impIsTableDrivenHWIntrinsic:
661 //
662 // Arguments:
663 //    category - category of a HW intrinsic
664 //
665 // Return Value:
666 //    returns true if this category can be table-driven in the importer
667 //
668 static bool impIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
669 {
670     // HW_Flag_NoCodeGen implies this intrinsic should be manually morphed in the importer.
671     return category != HW_Category_Special && category != HW_Category_Scalar &&
672            ((flags & (HW_Flag_NoCodeGen | HW_Flag_SpecialImport)) == 0);
673 }
674
675 //------------------------------------------------------------------------
676 // impHWIntrinsic: dispatch hardware intrinsics to their own implementation
677 //
678 // Arguments:
679 //    intrinsic -- id of the intrinsic function.
680 //    method    -- method handle of the intrinsic function.
681 //    sig       -- signature of the intrinsic call
682 //
683 // Return Value:
684 //    the expanded intrinsic.
685 //
686 GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
687                                   CORINFO_METHOD_HANDLE method,
688                                   CORINFO_SIG_INFO*     sig,
689                                   bool                  mustExpand)
690 {
691     InstructionSet      isa      = isaOfHWIntrinsic(intrinsic);
692     HWIntrinsicCategory category = categoryOfHWIntrinsic(intrinsic);
693     HWIntrinsicFlag     flags    = flagsOfHWIntrinsic(intrinsic);
694     int                 numArgs  = sig->numArgs;
695     var_types           retType  = JITtype2varType(sig->retType);
696     var_types           baseType = TYP_UNKNOWN;
697
698     if ((retType == TYP_STRUCT) && featureSIMD)
699     {
700         unsigned int sizeBytes;
701         baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
702         retType  = getSIMDTypeForSize(sizeBytes);
703         assert(sizeBytes != 0);
704     }
705
706     // This intrinsic is supported if
707     // - the ISA is available on the underlying hardware (compSupports returns true)
708     // - the compiler supports this hardware intrinsics (compSupportsHWIntrinsic returns true)
709     // - intrinsics do not require 64-bit registers (r64) on 32-bit platforms (signatureTypeSupproted returns
710     // true)
711     bool issupported =
712         compSupports(isa) && compSupportsHWIntrinsic(isa) && hwIntrinsicSignatureTypeSupported(retType, sig, flags);
713
714     if (category == HW_Category_IsSupportedProperty)
715     {
716         return gtNewIconNode(issupported);
717     }
718     // - calling to unsupported intrinsics must throw PlatforNotSupportedException
719     else if (!issupported)
720     {
721         return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
722     }
723     // Avoid checking stacktop for 0-op intrinsics
724     if (sig->numArgs > 0 && isImmHWIntrinsic(intrinsic, impStackTop().val))
725     {
726         GenTree* lastOp = impStackTop().val;
727         // The imm-HWintrinsics that do not accept all imm8 values may throw
728         // ArgumentOutOfRangeException when the imm argument is not in the valid range
729         if ((flags & HW_Flag_FullRangeIMM) == 0)
730         {
731             if (!mustExpand && lastOp->IsCnsIntOrI() &&
732                 lastOp->AsIntCon()->IconValue() > immUpperBoundOfHWIntrinsic(intrinsic))
733             {
734                 return nullptr;
735             }
736         }
737
738         if (!lastOp->IsCnsIntOrI())
739         {
740             if ((flags & HW_Flag_NoJmpTableIMM) == 0 && !mustExpand)
741             {
742                 // When the imm-argument is not a constant and we are not being forced to expand, we need to
743                 // return nullptr so a GT_CALL to the intrinsic method is emitted instead. The
744                 // intrinsic method is recursive and will be forced to expand, at which point
745                 // we emit some less efficient fallback code.
746                 return nullptr;
747             }
748             else if ((flags & HW_Flag_NoJmpTableIMM) != 0)
749             {
750                 return impNonConstFallback(intrinsic, retType, baseType);
751             }
752         }
753     }
754
755     bool isTableDriven = impIsTableDrivenHWIntrinsic(category, flags);
756
757     if (isTableDriven && ((category == HW_Category_MemoryStore) ||
758                           ((flags & (HW_Flag_BaseTypeFromFirstArg | HW_Flag_BaseTypeFromSecondArg)) != 0)))
759     {
760         if ((flags & HW_Flag_BaseTypeFromFirstArg) != 0)
761         {
762             baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
763         }
764         else
765         {
766             assert((category == HW_Category_MemoryStore) || ((flags & HW_Flag_BaseTypeFromSecondArg) != 0));
767             CORINFO_ARG_LIST_HANDLE secondArg      = info.compCompHnd->getArgNext(sig->args);
768             CORINFO_CLASS_HANDLE    secondArgClass = info.compCompHnd->getArgClass(sig, secondArg);
769             baseType                               = getBaseTypeOfSIMDType(secondArgClass);
770
771             if (baseType == TYP_UNKNOWN) // the second argument is not a vector
772             {
773                 baseType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, secondArg, &secondArgClass)));
774                 assert(baseType != TYP_STRUCT);
775             }
776         }
777
778         assert(baseType != TYP_UNKNOWN);
779     }
780
781     if (((flags & (HW_Flag_OneTypeGeneric | HW_Flag_TwoTypeGeneric)) != 0) && ((flags & HW_Flag_SpecialImport) == 0))
782     {
783         if (!varTypeIsArithmetic(baseType))
784         {
785             return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
786         }
787
788         if ((flags & HW_Flag_TwoTypeGeneric) != 0)
789         {
790             // StaticCast<T, U> has two type parameters.
791             assert(numArgs == 1);
792             var_types srcType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
793             if (!varTypeIsArithmetic(srcType))
794             {
795                 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
796             }
797         }
798     }
799
800     if ((flags & HW_Flag_NoFloatingPointUsed) == 0)
801     {
802         // Set `compFloatingPointUsed` to cover the scenario where an intrinsic is being on SIMD fields, but
803         // where no SIMD local vars are in use. This is the same logic as is used for FEATURE_SIMD.
804         compFloatingPointUsed = true;
805     }
806
807     // table-driven importer of simple intrinsics
808     if (isTableDriven)
809     {
810         unsigned                simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
811         CORINFO_ARG_LIST_HANDLE argList  = sig->args;
812         CORINFO_CLASS_HANDLE    argClass;
813         var_types               argType = TYP_UNKNOWN;
814
815         assert(numArgs >= 0);
816         assert(insOfHWIntrinsic(intrinsic, baseType) != INS_invalid);
817         assert(simdSize == 32 || simdSize == 16);
818
819         GenTreeHWIntrinsic* retNode = nullptr;
820         GenTree*            op1     = nullptr;
821         GenTree*            op2     = nullptr;
822
823         switch (numArgs)
824         {
825             case 0:
826                 retNode = gtNewSimdHWIntrinsicNode(retType, intrinsic, baseType, simdSize);
827                 break;
828             case 1:
829                 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
830                 op1     = getArgForHWIntrinsic(argType, argClass);
831                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
832                 break;
833             case 2:
834                 argType = JITtype2varType(
835                     strip(info.compCompHnd->getArgType(sig, info.compCompHnd->getArgNext(argList), &argClass)));
836                 op2 = getArgForHWIntrinsic(argType, argClass);
837
838                 op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand);
839
840                 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
841                 op1     = getArgForHWIntrinsic(argType, argClass);
842
843                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, baseType, simdSize);
844                 break;
845
846             case 3:
847             {
848                 CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList);
849                 CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
850
851                 argType      = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
852                 GenTree* op3 = getArgForHWIntrinsic(argType, argClass);
853
854                 op3 = addRangeCheckIfNeeded(intrinsic, op3, mustExpand);
855
856                 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
857                 op2     = getArgForHWIntrinsic(argType, argClass);
858
859                 argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass)));
860                 op1     = getArgForHWIntrinsic(argType, argClass);
861
862                 retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, baseType, simdSize);
863                 break;
864             }
865             default:
866                 unreached();
867         }
868
869         bool isMemoryStore = retNode->OperIsMemoryStore();
870         if (isMemoryStore || retNode->OperIsMemoryLoad())
871         {
872             if (isMemoryStore)
873             {
874                 // A MemoryStore operation is an assignment
875                 retNode->gtFlags |= GTF_ASG;
876             }
877
878             // This operation contains an implicit indirection
879             //   it could point into the gloabal heap or
880             //   it could throw a null reference exception.
881             //
882             retNode->gtFlags |= (GTF_GLOB_REF | GTF_EXCEPT);
883         }
884         return retNode;
885     }
886
887     // other intrinsics need special importation
888     switch (isa)
889     {
890         case InstructionSet_SSE:
891             return impSSEIntrinsic(intrinsic, method, sig, mustExpand);
892         case InstructionSet_SSE2:
893             return impSSE2Intrinsic(intrinsic, method, sig, mustExpand);
894         case InstructionSet_SSE42:
895             return impSSE42Intrinsic(intrinsic, method, sig, mustExpand);
896         case InstructionSet_AVX:
897         case InstructionSet_AVX2:
898             return impAvxOrAvx2Intrinsic(intrinsic, method, sig, mustExpand);
899
900         case InstructionSet_AES:
901             return impAESIntrinsic(intrinsic, method, sig, mustExpand);
902         case InstructionSet_BMI1:
903             return impBMI1Intrinsic(intrinsic, method, sig, mustExpand);
904         case InstructionSet_BMI2:
905             return impBMI2Intrinsic(intrinsic, method, sig, mustExpand);
906         case InstructionSet_FMA:
907             return impFMAIntrinsic(intrinsic, method, sig, mustExpand);
908         case InstructionSet_LZCNT:
909             return impLZCNTIntrinsic(intrinsic, method, sig, mustExpand);
910         case InstructionSet_PCLMULQDQ:
911             return impPCLMULQDQIntrinsic(intrinsic, method, sig, mustExpand);
912         case InstructionSet_POPCNT:
913             return impPOPCNTIntrinsic(intrinsic, method, sig, mustExpand);
914         default:
915             return nullptr;
916     }
917 }
918
919 GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
920                                    CORINFO_METHOD_HANDLE method,
921                                    CORINFO_SIG_INFO*     sig,
922                                    bool                  mustExpand)
923 {
924     GenTree* retNode  = nullptr;
925     GenTree* op1      = nullptr;
926     GenTree* op2      = nullptr;
927     GenTree* op3      = nullptr;
928     GenTree* op4      = nullptr;
929     int      simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
930
931     // The Prefetch and StoreFence intrinsics don't take any SIMD operands
932     // and have a simdSize of 0
933     assert((simdSize == 16) || (simdSize == 0));
934
935     switch (intrinsic)
936     {
937         case NI_SSE_MoveMask:
938             assert(sig->numArgs == 1);
939             assert(JITtype2varType(sig->retType) == TYP_INT);
940             assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
941             op1     = impSIMDPopStack(TYP_SIMD16);
942             retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, simdSize);
943             break;
944
945         case NI_SSE_Prefetch0:
946         case NI_SSE_Prefetch1:
947         case NI_SSE_Prefetch2:
948         case NI_SSE_PrefetchNonTemporal:
949         {
950             assert(sig->numArgs == 1);
951             assert(JITtype2varType(sig->retType) == TYP_VOID);
952             op1     = impPopStack().val;
953             retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, intrinsic, TYP_UBYTE, 0);
954             break;
955         }
956
957         case NI_SSE_StoreFence:
958             assert(sig->numArgs == 0);
959             assert(JITtype2varType(sig->retType) == TYP_VOID);
960             retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, 0);
961             break;
962
963         default:
964             JITDUMP("Not implemented hardware intrinsic");
965             break;
966     }
967     return retNode;
968 }
969
970 GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
971                                     CORINFO_METHOD_HANDLE method,
972                                     CORINFO_SIG_INFO*     sig,
973                                     bool                  mustExpand)
974 {
975     GenTree*  retNode  = nullptr;
976     GenTree*  op1      = nullptr;
977     GenTree*  op2      = nullptr;
978     int       ival     = -1;
979     int       simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
980     var_types baseType = TYP_UNKNOWN;
981     var_types retType  = TYP_UNKNOWN;
982
983     // The  fencing intrinsics don't take any operands and simdSize is 0
984     assert((simdSize == 16) || (simdSize == 0));
985
986     CORINFO_ARG_LIST_HANDLE argList = sig->args;
987     CORINFO_CLASS_HANDLE    argClass;
988     var_types               argType = TYP_UNKNOWN;
989
990     switch (intrinsic)
991     {
992         case NI_SSE2_CompareLessThan:
993         {
994             assert(sig->numArgs == 2);
995             op2      = impSIMDPopStack(TYP_SIMD16);
996             op1      = impSIMDPopStack(TYP_SIMD16);
997             baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
998             if (baseType == TYP_DOUBLE)
999             {
1000                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, baseType, simdSize);
1001             }
1002             else
1003             {
1004                 retNode =
1005                     gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareGreaterThan, baseType, simdSize);
1006             }
1007             break;
1008         }
1009
1010         case NI_SSE2_LoadFence:
1011         case NI_SSE2_MemoryFence:
1012         {
1013             assert(sig->numArgs == 0);
1014             assert(JITtype2varType(sig->retType) == TYP_VOID);
1015             assert(simdSize == 0);
1016
1017             retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, simdSize);
1018             break;
1019         }
1020
1021         case NI_SSE2_MoveMask:
1022         {
1023             assert(sig->numArgs == 1);
1024             retType = JITtype2varType(sig->retType);
1025             assert(retType == TYP_INT);
1026             op1      = impSIMDPopStack(TYP_SIMD16);
1027             baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
1028             retNode  = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, baseType, simdSize);
1029             break;
1030         }
1031
1032         case NI_SSE2_StoreNonTemporal:
1033         {
1034             assert(sig->numArgs == 2);
1035             assert(JITtype2varType(sig->retType) == TYP_VOID);
1036             op2     = impPopStack().val;
1037             op1     = impPopStack().val;
1038             retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, op2->TypeGet(), 0);
1039             break;
1040         }
1041
1042         default:
1043             JITDUMP("Not implemented hardware intrinsic");
1044             break;
1045     }
1046     return retNode;
1047 }
1048
1049 GenTree* Compiler::impSSE42Intrinsic(NamedIntrinsic        intrinsic,
1050                                      CORINFO_METHOD_HANDLE method,
1051                                      CORINFO_SIG_INFO*     sig,
1052                                      bool                  mustExpand)
1053 {
1054     GenTree*  retNode  = nullptr;
1055     GenTree*  op1      = nullptr;
1056     GenTree*  op2      = nullptr;
1057     var_types callType = JITtype2varType(sig->retType);
1058
1059     CORINFO_ARG_LIST_HANDLE argList = sig->args;
1060     CORINFO_CLASS_HANDLE    argClass;
1061     CorInfoType             corType;
1062     switch (intrinsic)
1063     {
1064         case NI_SSE42_Crc32:
1065             assert(sig->numArgs == 2);
1066             op2     = impPopStack().val;
1067             op1     = impPopStack().val;
1068             argList = info.compCompHnd->getArgNext(argList);                        // the second argument
1069             corType = strip(info.compCompHnd->getArgType(sig, argList, &argClass)); // type of the second argument
1070
1071             retNode = gtNewScalarHWIntrinsicNode(callType, op1, op2, NI_SSE42_Crc32);
1072
1073             // TODO - currently we use the BaseType to bring the type of the second argument
1074             // to the code generator. May encode the overload info in other way.
1075             retNode->gtHWIntrinsic.gtSIMDBaseType = JITtype2varType(corType);
1076             break;
1077
1078         default:
1079             JITDUMP("Not implemented hardware intrinsic");
1080             break;
1081     }
1082     return retNode;
1083 }
1084
1085 //------------------------------------------------------------------------
1086 // normalizeAndGetHalfIndex: compute the half index of a Vector256<baseType>
1087 //                           and normalize the index to the specific range
1088 //
1089 // Arguments:
1090 //    indexPtr   -- OUT paramter, the pointer to the original index value
1091 //    baseType   -- the base type of the Vector256<T>
1092 //
1093 // Return Value:
1094 //    retuen the middle index of a Vector256<baseType>
1095 //    return the normalized index via indexPtr
1096 //
1097 static int normalizeAndGetHalfIndex(int* indexPtr, var_types baseType)
1098 {
1099     assert(varTypeIsArithmetic(baseType));
1100     // clear the unused bits to normalize the index into the range of [0, length of Vector256<baseType>)
1101     *indexPtr = (*indexPtr) & (32 / genTypeSize(baseType) - 1);
1102     return (16 / genTypeSize(baseType));
1103 }
1104
1105 GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic        intrinsic,
1106                                          CORINFO_METHOD_HANDLE method,
1107                                          CORINFO_SIG_INFO*     sig,
1108                                          bool                  mustExpand)
1109 {
1110     GenTree*  retNode  = nullptr;
1111     GenTree*  op1      = nullptr;
1112     GenTree*  op2      = nullptr;
1113     var_types baseType = TYP_UNKNOWN;
1114     int       simdSize = simdSizeOfHWIntrinsic(intrinsic, sig);
1115
1116     switch (intrinsic)
1117     {
1118         case NI_AVX_Extract:
1119         {
1120             // Avx.Extract executes software implementation when the imm8 argument is not compile-time constant
1121             assert(!mustExpand);
1122
1123             GenTree* lastOp   = impPopStack().val;
1124             GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
1125             assert(lastOp->IsCnsIntOrI());
1126             int ival          = (int)lastOp->AsIntCon()->IconValue();
1127             baseType          = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
1128             var_types retType = JITtype2varType(sig->retType);
1129             assert(varTypeIsArithmetic(baseType));
1130
1131             int            midIndex         = normalizeAndGetHalfIndex(&ival, baseType);
1132             NamedIntrinsic extractIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Extract : NI_SSE41_Extract;
1133             GenTree*       half             = nullptr;
1134
1135             if (ival >= midIndex)
1136             {
1137                 half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
1138                                                 baseType, 32);
1139                 ival -= midIndex;
1140             }
1141             else
1142             {
1143                 half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
1144             }
1145
1146             retNode = gtNewSimdHWIntrinsicNode(retType, half, gtNewIconNode(ival), extractIntrinsic, baseType, 16);
1147             break;
1148         }
1149
1150         case NI_AVX_Insert:
1151         {
1152             // Avx.Extract executes software implementation when the imm8 argument is not compile-time constant
1153             assert(!mustExpand);
1154
1155             GenTree* lastOp   = impPopStack().val;
1156             GenTree* dataOp   = impPopStack().val;
1157             GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
1158             assert(lastOp->IsCnsIntOrI());
1159             int ival = (int)lastOp->AsIntCon()->IconValue();
1160             baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1161             assert(varTypeIsArithmetic(baseType));
1162
1163             int            midIndex        = normalizeAndGetHalfIndex(&ival, baseType);
1164             NamedIntrinsic insertIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Insert : NI_SSE41_Insert;
1165
1166             GenTree* clonedVectorOp;
1167             vectorOp =
1168                 impCloneExpr(vectorOp, &clonedVectorOp, info.compCompHnd->getArgClass(sig, sig->args),
1169                              (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("AVX Insert clones the vector operand"));
1170
1171             if (ival >= midIndex)
1172             {
1173                 GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1),
1174                                                                NI_AVX_ExtractVector128, baseType, 32);
1175                 GenTree* ModifiedHalfVector =
1176                     gtNewSimdHWIntrinsicNode(TYP_SIMD16, halfVector, dataOp, gtNewIconNode(ival - midIndex),
1177                                              insertIntrinsic, baseType, 16);
1178                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(1),
1179                                                    NI_AVX_InsertVector128, baseType, 32);
1180             }
1181             else
1182             {
1183                 GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
1184                 GenTree* ModifiedHalfVector =
1185                     gtNewSimdHWIntrinsicNode(TYP_SIMD32, halfVector, dataOp, gtNewIconNode(ival), insertIntrinsic,
1186                                              baseType, 16);
1187                 retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(15),
1188                                                    NI_AVX_Blend, TYP_FLOAT, 32);
1189             }
1190             break;
1191         }
1192
1193         case NI_AVX_SetVector256:
1194         {
1195             // TODO-XARCH: support long/ulong on 32-bit platfroms (remove HW_Flag_SecondArgMaybe64Bit)
1196             int numArgs = sig->numArgs;
1197             assert(numArgs >= 4);
1198             assert(numArgs <= 32);
1199             baseType                  = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1200             GenTree* higherHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, NI_SSE_SetZeroVector128, TYP_FLOAT, 16);
1201             GenTree* lowerHalfVector  = gtNewSimdHWIntrinsicNode(TYP_SIMD16, NI_SSE_SetZeroVector128, TYP_FLOAT, 16);
1202             NamedIntrinsic insertIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Insert : NI_SSE41_Insert;
1203             int            ival            = 0;
1204
1205             if (baseType != TYP_DOUBLE)
1206             {
1207                 assert(varTypeIsIntegral(baseType) || baseType == TYP_FLOAT);
1208
1209                 for (int i = 0; i < numArgs / 2; i++)
1210                 {
1211                     GenTree* arg = impPopStack().val;
1212                     // SSE4.1 insertps has different semantics from integral insert
1213                     ival            = baseType == TYP_FLOAT ? i * 16 : i;
1214                     lowerHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, lowerHalfVector, arg, gtNewIconNode(ival),
1215                                                                insertIntrinsic, baseType, 16);
1216                 }
1217
1218                 for (int i = 0; i < numArgs / 2; i++)
1219                 {
1220                     GenTree* arg = impPopStack().val;
1221                     // SSE4.1 insertps has different semantics from integral insert
1222                     ival             = baseType == TYP_FLOAT ? i * 16 : i;
1223                     higherHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, higherHalfVector, arg, gtNewIconNode(ival),
1224                                                                 insertIntrinsic, baseType, 16);
1225                 }
1226             }
1227             else
1228             {
1229                 GenTree* op4     = impPopStack().val;
1230                 GenTree* op3     = impPopStack().val;
1231                 GenTree* op2     = impPopStack().val;
1232                 GenTree* op1     = impPopStack().val;
1233                 lowerHalfVector  = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op4, op3, NI_SSE2_UnpackLow, TYP_DOUBLE, 16);
1234                 higherHalfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_UnpackLow, TYP_DOUBLE, 16);
1235             }
1236
1237             retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, lowerHalfVector, higherHalfVector, gtNewIconNode(1),
1238                                                NI_AVX_InsertVector128, baseType, 32);
1239             break;
1240         }
1241
1242         case NI_AVX_SetAllVector256:
1243         {
1244             baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1245 #ifdef _TARGET_X86_
1246             // TODO-XARCH: support long/ulong on 32-bit platfroms
1247             if (varTypeIsLong(baseType))
1248             {
1249                 return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
1250             }
1251 #endif
1252             GenTree* arg = impPopStack().val;
1253             retNode      = gtNewSimdHWIntrinsicNode(TYP_SIMD32, arg, NI_AVX_SetAllVector256, baseType, 32);
1254             break;
1255         }
1256
1257         case NI_AVX_SetHighLow:
1258         {
1259             baseType              = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1260             GenTree* lowerVector  = impSIMDPopStack(TYP_SIMD16);
1261             GenTree* higherVector = impSIMDPopStack(TYP_SIMD16);
1262             retNode               = gtNewSimdHWIntrinsicNode(TYP_SIMD32, lowerVector, higherVector, gtNewIconNode(1),
1263                                                NI_AVX_InsertVector128, baseType, 32);
1264             break;
1265         }
1266
1267         case NI_AVX_ExtractVector128:
1268         case NI_AVX2_ExtractVector128:
1269         {
1270             GenTree* lastOp = impPopStack().val;
1271             assert(lastOp->IsCnsIntOrI() || mustExpand);
1272             GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
1273             if (sig->numArgs == 2)
1274             {
1275                 baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
1276                 if (!varTypeIsArithmetic(baseType))
1277                 {
1278                     retNode = impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
1279                 }
1280                 else
1281                 {
1282                     retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, lastOp, intrinsic, baseType, 32);
1283                 }
1284             }
1285             else
1286             {
1287                 assert(sig->numArgs == 3);
1288                 op1                                    = impPopStack().val;
1289                 CORINFO_ARG_LIST_HANDLE secondArg      = info.compCompHnd->getArgNext(sig->args);
1290                 CORINFO_CLASS_HANDLE    secondArgClass = info.compCompHnd->getArgClass(sig, secondArg);
1291                 baseType                               = getBaseTypeOfSIMDType(secondArgClass);
1292                 retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, vectorOp, lastOp, intrinsic, baseType, 32);
1293             }
1294             break;
1295         }
1296         default:
1297             JITDUMP("Not implemented hardware intrinsic");
1298             break;
1299     }
1300     return retNode;
1301 }
1302
1303 GenTree* Compiler::impAESIntrinsic(NamedIntrinsic        intrinsic,
1304                                    CORINFO_METHOD_HANDLE method,
1305                                    CORINFO_SIG_INFO*     sig,
1306                                    bool                  mustExpand)
1307 {
1308     return nullptr;
1309 }
1310
1311 GenTree* Compiler::impBMI1Intrinsic(NamedIntrinsic        intrinsic,
1312                                     CORINFO_METHOD_HANDLE method,
1313                                     CORINFO_SIG_INFO*     sig,
1314                                     bool                  mustExpand)
1315 {
1316     return nullptr;
1317 }
1318
1319 GenTree* Compiler::impBMI2Intrinsic(NamedIntrinsic        intrinsic,
1320                                     CORINFO_METHOD_HANDLE method,
1321                                     CORINFO_SIG_INFO*     sig,
1322                                     bool                  mustExpand)
1323 {
1324     return nullptr;
1325 }
1326
1327 GenTree* Compiler::impFMAIntrinsic(NamedIntrinsic        intrinsic,
1328                                    CORINFO_METHOD_HANDLE method,
1329                                    CORINFO_SIG_INFO*     sig,
1330                                    bool                  mustExpand)
1331 {
1332     return nullptr;
1333 }
1334
1335 GenTree* Compiler::impLZCNTIntrinsic(NamedIntrinsic        intrinsic,
1336                                      CORINFO_METHOD_HANDLE method,
1337                                      CORINFO_SIG_INFO*     sig,
1338                                      bool                  mustExpand)
1339 {
1340     assert(sig->numArgs == 1);
1341     var_types callType = JITtype2varType(sig->retType);
1342     return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_LZCNT_LeadingZeroCount);
1343 }
1344
1345 GenTree* Compiler::impPCLMULQDQIntrinsic(NamedIntrinsic        intrinsic,
1346                                          CORINFO_METHOD_HANDLE method,
1347                                          CORINFO_SIG_INFO*     sig,
1348                                          bool                  mustExpand)
1349 {
1350     return nullptr;
1351 }
1352
1353 GenTree* Compiler::impPOPCNTIntrinsic(NamedIntrinsic        intrinsic,
1354                                       CORINFO_METHOD_HANDLE method,
1355                                       CORINFO_SIG_INFO*     sig,
1356                                       bool                  mustExpand)
1357 {
1358     assert(sig->numArgs == 1);
1359     var_types callType = JITtype2varType(sig->retType);
1360     return gtNewScalarHWIntrinsicNode(callType, impPopStack().val, NI_POPCNT_PopCount);
1361 }
1362
1363 #endif // FEATURE_HW_INTRINSICS