Set isInternalRegDelayFree for several of the x86 hwintrinsics
[platform/upstream/coreclr.git] / src / jit / hwintrinsiccodegenxarch.cpp
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7 XX                                                                           XX
8 XX               Intel hardware intrinsic Code Generator                     XX
9 XX                                                                           XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12 */
13 #include "jitpch.h"
14 #ifdef _MSC_VER
15 #pragma hdrstop
16 #endif
17
18 #ifdef FEATURE_HW_INTRINSICS
19
20 #include "emit.h"
21 #include "codegen.h"
22 #include "sideeffects.h"
23 #include "lower.h"
24 #include "gcinfo.h"
25 #include "gcinfoencoder.h"
26
27 //------------------------------------------------------------------------
28 // genIsTableDrivenHWIntrinsic:
29 //
30 // Arguments:
31 //    category - category of a HW intrinsic
32 //
33 // Return Value:
34 //    returns true if this category can be table-driven in CodeGen
35 //
36 static bool genIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
37 {
38     // TODO - make more categories to the table-driven framework
39     // HW_Category_Helper and HW_Flag_MultiIns usually need manual codegen
40     const bool tableDrivenCategory =
41         category != HW_Category_Special && category != HW_Category_Scalar && category != HW_Category_Helper;
42     const bool tableDrivenFlag = (flags & HW_Flag_MultiIns) == 0;
43     return tableDrivenCategory && tableDrivenFlag;
44 }
45
46 //------------------------------------------------------------------------
47 // genHWIntrinsic: Generates the code for a given hardware intrinsic node.
48 //
49 // Arguments:
50 //    node - The hardware intrinsic node
51 //
52 void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
53 {
54     NamedIntrinsic      intrinsicID = node->gtHWIntrinsicId;
55     InstructionSet      isa         = Compiler::isaOfHWIntrinsic(intrinsicID);
56     HWIntrinsicCategory category    = Compiler::categoryOfHWIntrinsic(intrinsicID);
57     HWIntrinsicFlag     flags       = Compiler::flagsOfHWIntrinsic(intrinsicID);
58     int                 ival        = Compiler::ivalOfHWIntrinsic(intrinsicID);
59     int                 numArgs     = Compiler::numArgsOfHWIntrinsic(intrinsicID);
60
61     assert((flags & HW_Flag_NoCodeGen) == 0);
62
63     if (genIsTableDrivenHWIntrinsic(category, flags))
64     {
65         GenTree*  op1        = node->gtGetOp1();
66         GenTree*  op2        = node->gtGetOp2();
67         regNumber targetReg  = node->gtRegNum;
68         var_types targetType = node->TypeGet();
69         var_types baseType   = node->gtSIMDBaseType;
70
71         regNumber op1Reg = REG_NA;
72         regNumber op2Reg = REG_NA;
73         emitter*  emit   = getEmitter();
74
75         assert(numArgs >= 0);
76         instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
77         assert(ins != INS_invalid);
78         emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
79         assert(simdSize != 0);
80
81         switch (numArgs)
82         {
83             case 1:
84                 genConsumeOperands(node);
85                 op1Reg = op1->gtRegNum;
86                 if (category == HW_Category_MemoryLoad)
87                 {
88                     emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0);
89                 }
90                 else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0)
91                 {
92                     emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg);
93                 }
94                 else if ((ival != -1) && varTypeIsFloating(baseType))
95                 {
96                     emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
97                 }
98                 else
99                 {
100                     emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
101                 }
102                 break;
103
104             case 2:
105                 genConsumeOperands(node);
106                 op1Reg = op1->gtRegNum;
107                 op2Reg = op2->gtRegNum;
108                 if (category == HW_Category_MemoryStore)
109                 {
110                     emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0);
111                 }
112                 else if ((ival != -1) && varTypeIsFloating(baseType))
113                 {
114                     genHWIntrinsic_R_R_RM_I(node, ins);
115                 }
116                 else if (category == HW_Category_MemoryLoad)
117                 {
118                     emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg);
119                 }
120                 else if (Compiler::isImmHWIntrinsic(intrinsicID, op2))
121                 {
122                     auto emitSwCase = [&](unsigned i) {
123                         emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i);
124                     };
125
126                     if (op2->IsCnsIntOrI())
127                     {
128                         ssize_t ival = op2->AsIntCon()->IconValue();
129                         emitSwCase((unsigned)ival);
130                     }
131                     else
132                     {
133                         // We emit a fallback case for the scenario when the imm-op is not a constant. This should
134                         // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
135                         // can also occur if the consumer calls it directly and just doesn't pass a constant value.
136                         regNumber baseReg = node->ExtractTempReg();
137                         regNumber offsReg = node->GetSingleTempReg();
138                         genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase);
139                     }
140                 }
141                 else
142                 {
143                     genHWIntrinsic_R_R_RM(node, ins);
144                 }
145                 break;
146             case 3:
147             {
148                 assert(op1->OperIsList());
149                 assert(op1->gtGetOp2()->OperIsList());
150                 assert(op1->gtGetOp2()->gtGetOp2()->OperIsList());
151
152                 GenTreeArgList* argList = op1->AsArgList();
153                 op1                     = argList->Current();
154                 genConsumeRegs(op1);
155                 op1Reg = op1->gtRegNum;
156
157                 argList = argList->Rest();
158                 op2     = argList->Current();
159                 genConsumeRegs(op2);
160                 op2Reg = op2->gtRegNum;
161
162                 argList      = argList->Rest();
163                 GenTree* op3 = argList->Current();
164                 genConsumeRegs(op3);
165                 regNumber op3Reg = op3->gtRegNum;
166
167                 if (Compiler::isImmHWIntrinsic(intrinsicID, op3))
168                 {
169                     auto emitSwCase = [&](unsigned i) {
170                         emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i);
171                     };
172                     if (op3->IsCnsIntOrI())
173                     {
174                         ssize_t ival = op3->AsIntCon()->IconValue();
175                         emitSwCase((unsigned)ival);
176                     }
177                     else
178                     {
179                         // We emit a fallback case for the scenario when the imm-op is not a constant. This should
180                         // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
181                         // can also occur if the consumer calls it directly and just doesn't pass a constant value.
182                         regNumber baseReg = node->ExtractTempReg();
183                         regNumber offsReg = node->GetSingleTempReg();
184                         genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase);
185                     }
186                 }
187                 else
188                 {
189                     emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg);
190                 }
191                 break;
192             }
193
194             default:
195                 unreached();
196                 break;
197         }
198         genProduceReg(node);
199         return;
200     }
201
202     switch (isa)
203     {
204         case InstructionSet_SSE:
205             genSSEIntrinsic(node);
206             break;
207         case InstructionSet_SSE2:
208             genSSE2Intrinsic(node);
209             break;
210         case InstructionSet_SSE41:
211             genSSE41Intrinsic(node);
212             break;
213         case InstructionSet_SSE42:
214             genSSE42Intrinsic(node);
215             break;
216         case InstructionSet_AVX:
217             genAVXIntrinsic(node);
218             break;
219         case InstructionSet_AVX2:
220             genAVX2Intrinsic(node);
221             break;
222         case InstructionSet_AES:
223             genAESIntrinsic(node);
224             break;
225         case InstructionSet_BMI1:
226             genBMI1Intrinsic(node);
227             break;
228         case InstructionSet_BMI2:
229             genBMI2Intrinsic(node);
230             break;
231         case InstructionSet_FMA:
232             genFMAIntrinsic(node);
233             break;
234         case InstructionSet_LZCNT:
235             genLZCNTIntrinsic(node);
236             break;
237         case InstructionSet_PCLMULQDQ:
238             genPCLMULQDQIntrinsic(node);
239             break;
240         case InstructionSet_POPCNT:
241             genPOPCNTIntrinsic(node);
242             break;
243         default:
244             unreached();
245             break;
246     }
247 }
248
249 //------------------------------------------------------------------------
250 // genHWIntrinsic_R_R_RM: Generates the code for a hardware intrinsic node that takes a register operand, a
251 //                        register/memory operand, and that returns a value in register
252 //
253 // Arguments:
254 //    node - The hardware intrinsic node
255 //    ins  - The instruction being generated
256 //
257 void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
258 {
259     var_types targetType = node->TypeGet();
260     regNumber targetReg  = node->gtRegNum;
261     GenTree*  op1        = node->gtGetOp1();
262     GenTree*  op2        = node->gtGetOp2();
263     emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
264     emitter*  emit       = getEmitter();
265
266     // TODO-XArch-CQ: Commutative operations can have op1 be contained
267     // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
268
269     regNumber op1Reg = op1->gtRegNum;
270
271     assert(targetReg != REG_NA);
272     assert(op1Reg != REG_NA);
273
274     if (op2->isContained() || op2->isUsedFromSpillTemp())
275     {
276         assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0);
277         assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional());
278
279         TempDsc* tmpDsc = nullptr;
280         unsigned varNum = BAD_VAR_NUM;
281         unsigned offset = (unsigned)-1;
282
283         if (op2->isUsedFromSpillTemp())
284         {
285             assert(op2->IsRegOptional());
286
287             tmpDsc = getSpillTempDsc(op2);
288             varNum = tmpDsc->tdTempNum();
289             offset = 0;
290
291             compiler->tmpRlsTemp(tmpDsc);
292         }
293         else if (op2->OperIsHWIntrinsic())
294         {
295             emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum);
296             return;
297         }
298         else if (op2->isIndir())
299         {
300             GenTreeIndir* memIndir = op2->AsIndir();
301             GenTree*      memBase  = memIndir->gtOp1;
302
303             switch (memBase->OperGet())
304             {
305                 case GT_LCL_VAR_ADDR:
306                 {
307                     varNum = memBase->AsLclVarCommon()->GetLclNum();
308                     offset = 0;
309
310                     // Ensure that all the GenTreeIndir values are set to their defaults.
311                     assert(!memIndir->HasIndex());
312                     assert(memIndir->Scale() == 1);
313                     assert(memIndir->Offset() == 0);
314
315                     break;
316                 }
317
318                 case GT_CLS_VAR_ADDR:
319                 {
320                     emit->emitIns_SIMD_R_R_C(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0);
321                     return;
322                 }
323
324                 default:
325                 {
326                     emit->emitIns_SIMD_R_R_A(ins, simdSize, targetReg, op1Reg, memIndir);
327                     return;
328                 }
329             }
330         }
331         else
332         {
333             switch (op2->OperGet())
334             {
335                 case GT_LCL_FLD:
336                 {
337                     GenTreeLclFld* lclField = op2->AsLclFld();
338
339                     varNum = lclField->GetLclNum();
340                     offset = lclField->gtLclFld.gtLclOffs;
341                     break;
342                 }
343
344                 case GT_LCL_VAR:
345                 {
346                     assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate());
347                     varNum = op2->AsLclVar()->GetLclNum();
348                     offset = 0;
349                     break;
350                 }
351
352                 default:
353                     unreached();
354                     break;
355             }
356         }
357
358         // Ensure we got a good varNum and offset.
359         // We also need to check for `tmpDsc != nullptr` since spill temp numbers
360         // are negative and start with -1, which also happens to be BAD_VAR_NUM.
361         assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
362         assert(offset != (unsigned)-1);
363
364         emit->emitIns_SIMD_R_R_S(ins, simdSize, targetReg, op1Reg, varNum, offset);
365     }
366     else
367     {
368         emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op2->gtRegNum);
369     }
370 }
371
372 //------------------------------------------------------------------------
373 // genHWIntrinsic_R_R_RM_I: Generates the code for a hardware intrinsic node that takes a register operand, a
374 //                        register/memory operand, an immediate operand, and that returns a value in register
375 //
376 // Arguments:
377 //    node - The hardware intrinsic node
378 //    ins  - The instruction being generated
379 //
380 void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
381 {
382     var_types targetType = node->TypeGet();
383     regNumber targetReg  = node->gtRegNum;
384     GenTree*  op1        = node->gtGetOp1();
385     GenTree*  op2        = node->gtGetOp2();
386     emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
387     int       ival       = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId);
388     emitter*  emit       = getEmitter();
389
390     // TODO-XArch-CQ: Commutative operations can have op1 be contained
391     // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
392
393     regNumber op1Reg = op1->gtRegNum;
394
395     assert(targetReg != REG_NA);
396     assert(op1Reg != REG_NA);
397
398     if (op2->isContained() || op2->isUsedFromSpillTemp())
399     {
400         assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0);
401         assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional());
402
403         TempDsc* tmpDsc = nullptr;
404         unsigned varNum = BAD_VAR_NUM;
405         unsigned offset = (unsigned)-1;
406
407         if (op2->isUsedFromSpillTemp())
408         {
409             assert(op2->IsRegOptional());
410
411             tmpDsc = getSpillTempDsc(op2);
412             varNum = tmpDsc->tdTempNum();
413             offset = 0;
414
415             compiler->tmpRlsTemp(tmpDsc);
416         }
417         else if (op2->OperIsHWIntrinsic())
418         {
419             emit->emitIns_SIMD_R_R_AR_I(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum, ival);
420             return;
421         }
422         else if (op2->isIndir())
423         {
424             GenTreeIndir* memIndir = op2->AsIndir();
425             GenTree*      memBase  = memIndir->gtOp1;
426
427             switch (memBase->OperGet())
428             {
429                 case GT_LCL_VAR_ADDR:
430                 {
431                     varNum = memBase->AsLclVarCommon()->GetLclNum();
432                     offset = 0;
433
434                     // Ensure that all the GenTreeIndir values are set to their defaults.
435                     assert(!memIndir->HasIndex());
436                     assert(memIndir->Scale() == 1);
437                     assert(memIndir->Offset() == 0);
438
439                     break;
440                 }
441
442                 case GT_CLS_VAR_ADDR:
443                 {
444                     emit->emitIns_SIMD_R_R_C_I(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0,
445                                                ival);
446                     return;
447                 }
448
449                 default:
450                 {
451                     emit->emitIns_SIMD_R_R_A_I(ins, simdSize, targetReg, op1Reg, memIndir, ival);
452                     return;
453                 }
454             }
455         }
456         else
457         {
458             switch (op2->OperGet())
459             {
460                 case GT_LCL_FLD:
461                 {
462                     GenTreeLclFld* lclField = op2->AsLclFld();
463
464                     varNum = lclField->GetLclNum();
465                     offset = lclField->gtLclFld.gtLclOffs;
466                     break;
467                 }
468
469                 case GT_LCL_VAR:
470                 {
471                     assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate());
472                     varNum = op2->AsLclVar()->GetLclNum();
473                     offset = 0;
474                     break;
475                 }
476
477                 default:
478                     unreached();
479                     break;
480             }
481         }
482
483         // Ensure we got a good varNum and offset.
484         // We also need to check for `tmpDsc != nullptr` since spill temp numbers
485         // are negative and start with -1, which also happens to be BAD_VAR_NUM.
486         assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
487         assert(offset != (unsigned)-1);
488
489         emit->emitIns_SIMD_R_R_S_I(ins, simdSize, targetReg, op1Reg, varNum, offset, ival);
490     }
491     else
492     {
493         emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2->gtRegNum, ival);
494     }
495 }
496
497 // genHWIntrinsicJumpTableFallback : generate the jump-table fallback for imm-intrinsics
498 //                       with non-constant argument
499 //
500 // Arguments:
501 //    intrinsic      - intrinsic ID
502 //    nonConstImmReg - the register contains non-constant imm8 argument
503 //    baseReg        - a register for the start of the switch table
504 //    offsReg        - a register for the offset into the switch table
505 //    emitSwCase     - the lambda to generate siwtch-case
506 //
507 // Return Value:
508 //    generate the jump-table fallback for imm-intrinsics with non-constant argument.
509 // Note:
510 //    This function can be used for all imm-intrinsics (whether full-range or not),
511 //    The compiler front-end (i.e. importer) is responsible to insert a range-check IR
512 //    (GT_HW_INTRINSIC_CHK) for imm8 argument, so this function does not need to do range-check.
513 //
514 template <typename HWIntrinsicSwitchCaseBody>
515 void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic            intrinsic,
516                                               regNumber                 nonConstImmReg,
517                                               regNumber                 baseReg,
518                                               regNumber                 offsReg,
519                                               HWIntrinsicSwitchCaseBody emitSwCase)
520 {
521     assert(nonConstImmReg != REG_NA);
522     emitter* emit = getEmitter();
523
524     const unsigned maxByte = (unsigned)Compiler::immUpperBoundOfHWIntrinsic(intrinsic) + 1;
525     assert(maxByte <= 256);
526     BasicBlock* jmpTable[256];
527
528     unsigned jmpTableBase = emit->emitBBTableDataGenBeg(maxByte, true);
529     unsigned jmpTableOffs = 0;
530
531     // Emit the jump table
532     for (unsigned i = 0; i < maxByte; i++)
533     {
534         jmpTable[i] = genCreateTempLabel();
535         emit->emitDataGenData(i, jmpTable[i]);
536     }
537
538     emit->emitDataGenEnd();
539
540     // Compute and jump to the appropriate offset in the switch table
541     emit->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), offsReg, compiler->eeFindJitDataOffs(jmpTableBase), 0);
542
543     emit->emitIns_R_ARX(INS_mov, EA_4BYTE, offsReg, offsReg, nonConstImmReg, 4, 0);
544     emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, baseReg);
545     emit->emitIns_R_R(INS_add, EA_PTRSIZE, offsReg, baseReg);
546     emit->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), offsReg);
547
548     // Emit the switch table entries
549
550     BasicBlock* switchTableBeg = genCreateTempLabel();
551     BasicBlock* switchTableEnd = genCreateTempLabel();
552
553     genDefineTempLabel(switchTableBeg);
554
555     for (unsigned i = 0; i < maxByte; i++)
556     {
557         genDefineTempLabel(jmpTable[i]);
558         emitSwCase(i);
559         emit->emitIns_J(INS_jmp, switchTableEnd);
560     }
561
562     genDefineTempLabel(switchTableEnd);
563 }
564
565 //------------------------------------------------------------------------
566 // genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node
567 //
568 // Arguments:
569 //    node - The hardware intrinsic node
570 //
571 void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
572 {
573     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
574     GenTree*       op1         = node->gtGetOp1();
575     GenTree*       op2         = node->gtGetOp2();
576     GenTree*       op3         = nullptr;
577     GenTree*       op4         = nullptr;
578     regNumber      targetReg   = node->gtRegNum;
579     var_types      targetType  = node->TypeGet();
580     var_types      baseType    = node->gtSIMDBaseType;
581
582     regNumber op1Reg = REG_NA;
583     regNumber op2Reg = REG_NA;
584     regNumber op3Reg = REG_NA;
585     regNumber op4Reg = REG_NA;
586     emitter*  emit   = getEmitter();
587
588     if ((op1 != nullptr) && !op1->OperIsList())
589     {
590         op1Reg = op1->gtRegNum;
591         genConsumeOperands(node);
592     }
593
594     switch (intrinsicID)
595     {
596         case NI_SSE_ConvertScalarToVector128Single:
597         {
598             assert(node->TypeGet() == TYP_SIMD16);
599             assert(node->gtSIMDBaseType == TYP_FLOAT);
600             assert(Compiler::ivalOfHWIntrinsic(intrinsicID) == -1);
601
602             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
603             genHWIntrinsic_R_R_RM(node, ins);
604             break;
605         }
606
607         case NI_SSE_CompareEqualOrderedScalar:
608         case NI_SSE_CompareEqualUnorderedScalar:
609         {
610             assert(baseType == TYP_FLOAT);
611             op2Reg             = op2->gtRegNum;
612             regNumber   tmpReg = node->GetSingleTempReg();
613             instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
614
615             // Ensure we aren't overwriting targetReg
616             assert(tmpReg != targetReg);
617
618             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
619             emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
620             emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
621             emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
622             emit->emitIns_R(INS_setne, EA_1BYTE, targetReg);
623             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
624             break;
625         }
626
627         case NI_SSE_CompareGreaterThanOrderedScalar:
628         case NI_SSE_CompareGreaterThanUnorderedScalar:
629         {
630             assert(baseType == TYP_FLOAT);
631             op2Reg = op2->gtRegNum;
632
633             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
634             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
635             emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
636             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
637             break;
638         }
639
640         case NI_SSE_CompareGreaterThanOrEqualOrderedScalar:
641         case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
642         {
643             assert(baseType == TYP_FLOAT);
644             op2Reg = op2->gtRegNum;
645
646             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
647             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
648             emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
649             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
650             break;
651         }
652
653         case NI_SSE_CompareLessThanOrderedScalar:
654         case NI_SSE_CompareLessThanUnorderedScalar:
655         {
656             assert(baseType == TYP_FLOAT);
657             op2Reg = op2->gtRegNum;
658
659             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
660             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
661             emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
662             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
663             break;
664         }
665
666         case NI_SSE_CompareLessThanOrEqualOrderedScalar:
667         case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
668         {
669             assert(baseType == TYP_FLOAT);
670             op2Reg = op2->gtRegNum;
671
672             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
673             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
674             emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
675             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
676             break;
677         }
678
679         case NI_SSE_CompareNotEqualOrderedScalar:
680         case NI_SSE_CompareNotEqualUnorderedScalar:
681         {
682             assert(baseType == TYP_FLOAT);
683             op2Reg             = op2->gtRegNum;
684             regNumber   tmpReg = node->GetSingleTempReg();
685             instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
686
687             // Ensure we aren't overwriting targetReg
688             assert(tmpReg != targetReg);
689
690             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
691             emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
692             emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
693             emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
694             emit->emitIns_R(INS_setne, EA_1BYTE, targetReg);
695             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
696             break;
697         }
698
699         case NI_SSE_ConvertToSingle:
700         case NI_SSE_StaticCast:
701         {
702             assert(op2 == nullptr);
703             if (op1Reg != targetReg)
704             {
705                 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
706                 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg);
707             }
708             break;
709         }
710
711         case NI_SSE_MoveMask:
712         {
713             assert(baseType == TYP_FLOAT);
714             assert(op2 == nullptr);
715
716             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
717             emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
718             break;
719         }
720
721         case NI_SSE_Prefetch0:
722         case NI_SSE_Prefetch1:
723         case NI_SSE_Prefetch2:
724         case NI_SSE_PrefetchNonTemporal:
725         {
726             assert(baseType == TYP_UBYTE);
727             assert(op2 == nullptr);
728
729             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
730             emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0);
731             break;
732         }
733
734         case NI_SSE_ReciprocalScalar:
735         case NI_SSE_ReciprocalSqrtScalar:
736         case NI_SSE_SqrtScalar:
737         {
738             assert(baseType == TYP_FLOAT);
739             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
740
741             if (op2 == nullptr)
742             {
743                 emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op1Reg);
744             }
745             else
746             {
747                 genHWIntrinsic_R_R_RM(node, ins);
748             }
749             break;
750         }
751
752         case NI_SSE_SetScalarVector128:
753         {
754             assert(baseType == TYP_FLOAT);
755             assert(op2 == nullptr);
756
757             if (op1Reg == targetReg)
758             {
759                 regNumber tmpReg = node->GetSingleTempReg();
760
761                 // Ensure we aren't overwriting targetReg
762                 assert(tmpReg != targetReg);
763
764                 emit->emitIns_R_R(INS_movaps, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
765                 op1Reg = tmpReg;
766             }
767
768             emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
769             emit->emitIns_SIMD_R_R_R(INS_movss, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
770             break;
771         }
772
773         case NI_SSE_SetZeroVector128:
774         {
775             assert(baseType == TYP_FLOAT);
776             assert(op1 == nullptr);
777             assert(op2 == nullptr);
778             emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
779             break;
780         }
781
782         case NI_SSE_StoreFence:
783         {
784             assert(baseType == TYP_VOID);
785             assert(op1 == nullptr);
786             assert(op2 == nullptr);
787             emit->emitIns(INS_sfence);
788             break;
789         }
790
791         default:
792             unreached();
793             break;
794     }
795
796     genProduceReg(node);
797 }
798
799 //------------------------------------------------------------------------
800 // genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node
801 //
802 // Arguments:
803 //    node - The hardware intrinsic node
804 //
805 void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
806 {
807     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
808     GenTree*       op1         = node->gtGetOp1();
809     GenTree*       op2         = node->gtGetOp2();
810     regNumber      targetReg   = node->gtRegNum;
811     var_types      targetType  = node->TypeGet();
812     var_types      baseType    = node->gtSIMDBaseType;
813     regNumber      op1Reg      = REG_NA;
814     regNumber      op2Reg      = REG_NA;
815     emitter*       emit        = getEmitter();
816     int            ival        = -1;
817
818     if ((op1 != nullptr) && !op1->OperIsList())
819     {
820         op1Reg = op1->gtRegNum;
821         genConsumeOperands(node);
822     }
823
824     switch (intrinsicID)
825     {
826         // All integer overloads are handled by table codegen
827         case NI_SSE2_CompareLessThan:
828         {
829             assert(op1 != nullptr);
830             assert(op2 != nullptr);
831             assert(baseType == TYP_DOUBLE);
832             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
833             op2Reg          = op2->gtRegNum;
834             ival            = Compiler::ivalOfHWIntrinsic(intrinsicID);
835             assert(ival != -1);
836             emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);
837
838             break;
839         }
840
841         case NI_SSE2_CompareEqualOrderedScalar:
842         case NI_SSE2_CompareEqualUnorderedScalar:
843         {
844             assert(baseType == TYP_DOUBLE);
845             op2Reg             = op2->gtRegNum;
846             regNumber   tmpReg = node->GetSingleTempReg();
847             instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
848
849             // Ensure we aren't overwriting targetReg
850             assert(tmpReg != targetReg);
851
852             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
853             emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
854             emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
855             emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
856             emit->emitIns_R(INS_setne, EA_1BYTE, targetReg);
857             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
858             break;
859         }
860
861         case NI_SSE2_CompareGreaterThanOrderedScalar:
862         case NI_SSE2_CompareGreaterThanUnorderedScalar:
863         {
864             assert(baseType == TYP_DOUBLE);
865             op2Reg          = op2->gtRegNum;
866             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
867
868             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
869             emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
870             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
871             break;
872         }
873
874         case NI_SSE2_CompareGreaterThanOrEqualOrderedScalar:
875         case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar:
876         {
877             assert(baseType == TYP_DOUBLE);
878             op2Reg          = op2->gtRegNum;
879             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
880
881             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
882             emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
883             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
884             break;
885         }
886
887         case NI_SSE2_CompareLessThanOrderedScalar:
888         case NI_SSE2_CompareLessThanUnorderedScalar:
889         {
890             assert(baseType == TYP_DOUBLE);
891             op2Reg          = op2->gtRegNum;
892             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
893
894             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
895             emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
896             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
897             break;
898         }
899
900         case NI_SSE2_CompareLessThanOrEqualOrderedScalar:
901         case NI_SSE2_CompareLessThanOrEqualUnorderedScalar:
902         {
903             assert(baseType == TYP_DOUBLE);
904             op2Reg          = op2->gtRegNum;
905             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
906
907             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
908             emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
909             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
910             break;
911         }
912
913         case NI_SSE2_CompareNotEqualOrderedScalar:
914         case NI_SSE2_CompareNotEqualUnorderedScalar:
915         {
916             assert(baseType == TYP_DOUBLE);
917             op2Reg             = op2->gtRegNum;
918             instruction ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
919             regNumber   tmpReg = node->GetSingleTempReg();
920
921             // Ensure we aren't overwriting targetReg
922             assert(tmpReg != targetReg);
923
924             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
925             emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
926             emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
927             emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
928             emit->emitIns_R(INS_setne, EA_1BYTE, targetReg);
929             emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
930             break;
931         }
932
933         case NI_SSE2_ConvertScalarToVector128Double:
934         case NI_SSE2_ConvertScalarToVector128Single:
935         {
936             assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
937             assert(op1 != nullptr);
938             assert(op2 != nullptr);
939             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
940             genHWIntrinsic_R_R_RM(node, ins);
941             break;
942         }
943
944         case NI_SSE2_ConvertScalarToVector128Int64:
945         case NI_SSE2_ConvertScalarToVector128UInt64:
946         {
947             assert(baseType == TYP_LONG || baseType == TYP_ULONG);
948             assert(op1 != nullptr);
949             assert(op2 == nullptr);
950             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
951             // TODO-XArch-CQ -> use of type size of TYP_SIMD16 leads to
952             // instruction register encoding errors for SSE legacy encoding
953             emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg);
954             break;
955         }
956
957         case NI_SSE2_ConvertToDouble:
958         {
959             assert(op2 == nullptr);
960             if (op1Reg != targetReg)
961             {
962                 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
963                 emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
964             }
965             break;
966         }
967
968         case NI_SSE2_ConvertToInt32:
969         case NI_SSE2_ConvertToInt64:
970         case NI_SSE2_ConvertToUInt32:
971         case NI_SSE2_ConvertToUInt64:
972         {
973             assert(op2 == nullptr);
974             assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT ||
975                    baseType == TYP_LONG || baseType == TYP_ULONG);
976             if (op1Reg != targetReg)
977             {
978                 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
979                 if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT)
980                 {
981                     emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
982                 }
983                 else
984                 {
985                     // TODO-XArch-Bug https://github.com/dotnet/coreclr/issues/16329
986                     // using hardcoded instruction as workaround for inexact type conversions
987                     emit->emitIns_R_R(INS_mov_xmm2i, emitActualTypeSize(baseType), op1Reg, targetReg);
988                 }
989             }
990             break;
991         }
992
993         case NI_SSE2_LoadFence:
994         {
995             assert(baseType == TYP_VOID);
996             assert(op1 == nullptr);
997             assert(op2 == nullptr);
998             emit->emitIns(INS_lfence);
999             break;
1000         }
1001
1002         case NI_SSE2_MemoryFence:
1003         {
1004             assert(baseType == TYP_VOID);
1005             assert(op1 == nullptr);
1006             assert(op2 == nullptr);
1007             emit->emitIns(INS_mfence);
1008             break;
1009         }
1010
1011         case NI_SSE2_MoveMask:
1012         {
1013             assert(op2 == nullptr);
1014             assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);
1015
1016             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1017             emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
1018             break;
1019         }
1020
1021         case NI_SSE2_SetZeroVector128:
1022         {
1023             assert(baseType != TYP_FLOAT);
1024             assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE);
1025             assert(op1 == nullptr);
1026             assert(op2 == nullptr);
1027
1028             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1029             emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
1030             break;
1031         }
1032
1033         default:
1034             unreached();
1035             break;
1036     }
1037
1038     genProduceReg(node);
1039 }
1040
1041 //------------------------------------------------------------------------
1042 // genSSE41Intrinsic: Generates the code for an SSE4.1 hardware intrinsic node
1043 //
1044 // Arguments:
1045 //    node - The hardware intrinsic node
1046 //
1047 void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
1048 {
1049     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1050     GenTree*       op1         = node->gtGetOp1();
1051     GenTree*       op2         = node->gtGetOp2();
1052     GenTree*       op3         = nullptr;
1053     GenTree*       op4         = nullptr;
1054     regNumber      targetReg   = node->gtRegNum;
1055     var_types      targetType  = node->TypeGet();
1056     var_types      baseType    = node->gtSIMDBaseType;
1057
1058     regNumber op1Reg = REG_NA;
1059     regNumber op2Reg = REG_NA;
1060     regNumber op3Reg = REG_NA;
1061     regNumber op4Reg = REG_NA;
1062     emitter*  emit   = getEmitter();
1063
1064     if ((op1 != nullptr) && !op1->OperIsList())
1065     {
1066         op1Reg = op1->gtRegNum;
1067         genConsumeOperands(node);
1068     }
1069
1070     switch (intrinsicID)
1071     {
1072         case NI_SSE41_CeilingScalar:
1073         case NI_SSE41_FloorScalar:
1074         case NI_SSE41_RoundCurrentDirectionScalar:
1075         case NI_SSE41_RoundToNearestIntegerScalar:
1076         case NI_SSE41_RoundToNegativeInfinityScalar:
1077         case NI_SSE41_RoundToPositiveInfinityScalar:
1078         case NI_SSE41_RoundToZeroScalar:
1079         {
1080             assert((baseType == TYP_FLOAT) || (baseType == TYP_DOUBLE));
1081             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
1082
1083             if (op2 == nullptr)
1084             {
1085                 int ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
1086                 emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op1Reg, ival);
1087             }
1088             else
1089             {
1090                 genHWIntrinsic_R_R_RM_I(node, ins);
1091             }
1092             break;
1093         }
1094
1095         case NI_SSE41_TestAllOnes:
1096         {
1097             regNumber tmpReg = node->GetSingleTempReg();
1098             assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
1099             emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg);
1100             emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1101             emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg);
1102             emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
1103             break;
1104         }
1105
1106         case NI_SSE41_TestAllZeros:
1107         case NI_SSE41_TestZ:
1108         {
1109             assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
1110             emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1111             emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
1112             emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
1113             break;
1114         }
1115
1116         case NI_SSE41_TestC:
1117         {
1118             assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
1119             emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1120             emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
1121             emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
1122             break;
1123         }
1124
1125         case NI_SSE41_TestMixOnesZeros:
1126         case NI_SSE41_TestNotZAndNotC:
1127         {
1128             assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
1129             emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1130             emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
1131             emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
1132             break;
1133         }
1134
1135         default:
1136             unreached();
1137             break;
1138     }
1139
1140     genProduceReg(node);
1141 }
1142
1143 //------------------------------------------------------------------------
1144 // genSSE42Intrinsic: Generates the code for an SSE4.2 hardware intrinsic node
1145 //
1146 // Arguments:
1147 //    node - The hardware intrinsic node
1148 //
1149 void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
1150 {
1151     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1152     GenTree*       op1         = node->gtGetOp1();
1153     GenTree*       op2         = node->gtGetOp2();
1154     regNumber      targetReg   = node->gtRegNum;
1155     assert(targetReg != REG_NA);
1156     var_types targetType = node->TypeGet();
1157     var_types baseType   = node->gtSIMDBaseType;
1158
1159     regNumber op1Reg = op1->gtRegNum;
1160     regNumber op2Reg = op2->gtRegNum;
1161     genConsumeOperands(node);
1162
1163     switch (intrinsicID)
1164     {
1165         case NI_SSE42_Crc32:
1166             if (op1Reg != targetReg)
1167             {
1168                 inst_RV_RV(INS_mov, targetReg, op1Reg, targetType, emitTypeSize(targetType));
1169             }
1170
1171             if (baseType == TYP_UBYTE || baseType == TYP_USHORT) // baseType is the type of the second argument
1172             {
1173                 assert(targetType == TYP_INT);
1174                 inst_RV_RV(INS_crc32, targetReg, op2Reg, baseType, emitTypeSize(baseType));
1175             }
1176             else
1177             {
1178                 assert(op1->TypeGet() == op2->TypeGet());
1179                 assert(targetType == TYP_INT || targetType == TYP_LONG);
1180                 inst_RV_RV(INS_crc32, targetReg, op2Reg, targetType, emitTypeSize(targetType));
1181             }
1182
1183             break;
1184         default:
1185             unreached();
1186             break;
1187     }
1188     genProduceReg(node);
1189 }
1190
1191 //------------------------------------------------------------------------
1192 // genAVXIntrinsic: Generates the code for an AVX hardware intrinsic node
1193 //
1194 // Arguments:
1195 //    node - The hardware intrinsic node
1196 //
1197 void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
1198 {
1199     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1200     var_types      baseType    = node->gtSIMDBaseType;
1201     emitAttr       attr        = EA_ATTR(node->gtSIMDSize);
1202     var_types      targetType  = node->TypeGet();
1203     instruction    ins         = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1204     GenTree*       op1         = node->gtGetOp1();
1205     GenTree*       op2         = node->gtGetOp2();
1206     regNumber      targetReg   = node->gtRegNum;
1207     emitter*       emit        = getEmitter();
1208
1209     genConsumeOperands(node);
1210
1211     switch (intrinsicID)
1212     {
1213         case NI_AVX_SetZeroVector256:
1214         {
1215             assert(op1 == nullptr);
1216             assert(op2 == nullptr);
1217             // SetZeroVector256 will generate pxor with integral base-typ, but pxor is a AVX2 instruction, so we
1218             // generate xorps on AVX machines.
1219             if (!compiler->compSupports(InstructionSet_AVX2) && varTypeIsIntegral(baseType))
1220             {
1221                 emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg);
1222             }
1223             else
1224             {
1225                 emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
1226             }
1227             break;
1228         }
1229         default:
1230             unreached();
1231             break;
1232     }
1233
1234     genProduceReg(node);
1235 }
1236
1237 //------------------------------------------------------------------------
1238 // genAVX2Intrinsic: Generates the code for an AVX2 hardware intrinsic node
1239 //
1240 // Arguments:
1241 //    node - The hardware intrinsic node
1242 //
1243 void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
1244 {
1245     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1246     var_types      baseType    = node->gtSIMDBaseType;
1247     instruction    ins         = INS_invalid;
1248
1249     genConsumeOperands(node);
1250
1251     switch (intrinsicID)
1252     {
1253         default:
1254             unreached();
1255             break;
1256     }
1257
1258     genProduceReg(node);
1259 }
1260
1261 //------------------------------------------------------------------------
1262 // genAESIntrinsic: Generates the code for an AES hardware intrinsic node
1263 //
1264 // Arguments:
1265 //    node - The hardware intrinsic node
1266 //
1267 void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node)
1268 {
1269     NYI("Implement AES intrinsic code generation");
1270 }
1271
1272 //------------------------------------------------------------------------
1273 // genBMI1Intrinsic: Generates the code for a BMI1 hardware intrinsic node
1274 //
1275 // Arguments:
1276 //    node - The hardware intrinsic node
1277 //
1278 void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node)
1279 {
1280     NYI("Implement BMI1 intrinsic code generation");
1281 }
1282
1283 //------------------------------------------------------------------------
1284 // genBMI2Intrinsic: Generates the code for a BMI2 hardware intrinsic node
1285 //
1286 // Arguments:
1287 //    node - The hardware intrinsic node
1288 //
1289 void CodeGen::genBMI2Intrinsic(GenTreeHWIntrinsic* node)
1290 {
1291     NYI("Implement BMI2 intrinsic code generation");
1292 }
1293
1294 //------------------------------------------------------------------------
1295 // genFMAIntrinsic: Generates the code for an FMA hardware intrinsic node
1296 //
1297 // Arguments:
1298 //    node - The hardware intrinsic node
1299 //
1300 void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
1301 {
1302     NYI("Implement FMA intrinsic code generation");
1303 }
1304
1305 //------------------------------------------------------------------------
1306 // genLZCNTIntrinsic: Generates the code for a LZCNT hardware intrinsic node
1307 //
1308 // Arguments:
1309 //    node - The hardware intrinsic node
1310 //
1311 void CodeGen::genLZCNTIntrinsic(GenTreeHWIntrinsic* node)
1312 {
1313     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1314     GenTree*       op1         = node->gtGetOp1();
1315     regNumber      targetReg   = node->gtRegNum;
1316     assert(targetReg != REG_NA);
1317     var_types targetType = node->TypeGet();
1318     regNumber op1Reg     = op1->gtRegNum;
1319     genConsumeOperands(node);
1320
1321     assert(intrinsicID == NI_LZCNT_LeadingZeroCount);
1322
1323     inst_RV_RV(INS_lzcnt, targetReg, op1Reg, targetType, emitTypeSize(targetType));
1324
1325     genProduceReg(node);
1326 }
1327
1328 //------------------------------------------------------------------------
1329 // genPCLMULQDQIntrinsic: Generates the code for a PCLMULQDQ hardware intrinsic node
1330 //
1331 // Arguments:
1332 //    node - The hardware intrinsic node
1333 //
1334 void CodeGen::genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node)
1335 {
1336     NYI("Implement PCLMULQDQ intrinsic code generation");
1337 }
1338
1339 //------------------------------------------------------------------------
1340 // genPOPCNTIntrinsic: Generates the code for a POPCNT hardware intrinsic node
1341 //
1342 // Arguments:
1343 //    node - The hardware intrinsic node
1344 //
1345 void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node)
1346 {
1347     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1348     GenTree*       op1         = node->gtGetOp1();
1349     regNumber      targetReg   = node->gtRegNum;
1350     assert(targetReg != REG_NA);
1351     var_types targetType = node->TypeGet();
1352     regNumber op1Reg     = op1->gtRegNum;
1353     genConsumeOperands(node);
1354
1355     assert(intrinsicID == NI_POPCNT_PopCount);
1356
1357     inst_RV_RV(INS_popcnt, targetReg, op1Reg, targetType, emitTypeSize(targetType));
1358
1359     genProduceReg(node);
1360 }
1361
1362 #endif // FEATURE_HW_INTRINSICS