1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Intel hardware intrinsic Code Generator XX
10 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
18 #ifdef FEATURE_HW_INTRINSICS
22 #include "sideeffects.h"
25 #include "gcinfoencoder.h"
27 //------------------------------------------------------------------------
28 // genIsTableDrivenHWIntrinsic:
31 // category - category of a HW intrinsic
34 // returns true if this category can be table-driven in CodeGen
36 static bool genIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsicFlag flags)
38 // TODO - make more categories to the table-driven framework
39 // HW_Category_Helper and HW_Flag_MultiIns/HW_Flag_SpecialCodeGen usually need manual codegen
40 const bool tableDrivenCategory =
41 category != HW_Category_Special && category != HW_Category_Scalar && category != HW_Category_Helper;
42 const bool tableDrivenFlag = (flags & (HW_Flag_MultiIns | HW_Flag_SpecialCodeGen)) == 0;
43 return tableDrivenCategory && tableDrivenFlag;
46 //------------------------------------------------------------------------
47 // genHWIntrinsic: Generates the code for a given hardware intrinsic node.
50 // node - The hardware intrinsic node
52 void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
54 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
55 InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID);
56 HWIntrinsicCategory category = Compiler::categoryOfHWIntrinsic(intrinsicID);
57 HWIntrinsicFlag flags = Compiler::flagsOfHWIntrinsic(intrinsicID);
58 int ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
59 int numArgs = Compiler::numArgsOfHWIntrinsic(node);
61 assert((flags & HW_Flag_NoCodeGen) == 0);
63 if (genIsTableDrivenHWIntrinsic(category, flags))
65 GenTree* op1 = node->gtGetOp1();
66 GenTree* op2 = node->gtGetOp2();
67 regNumber targetReg = node->gtRegNum;
68 var_types targetType = node->TypeGet();
69 var_types baseType = node->gtSIMDBaseType;
71 regNumber op1Reg = REG_NA;
72 regNumber op2Reg = REG_NA;
73 emitter* emit = getEmitter();
76 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
77 assert(ins != INS_invalid);
78 emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
79 assert(simdSize != 0);
85 genConsumeOperands(node);
86 op1Reg = op1->gtRegNum;
87 if (category == HW_Category_MemoryLoad)
89 emit->emitIns_R_AR(ins, simdSize, targetReg, op1Reg, 0);
91 else if (category == HW_Category_SIMDScalar && (flags & HW_Flag_CopyUpperBits) != 0)
93 emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg);
95 else if ((ival != -1) && varTypeIsFloating(baseType))
97 emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
101 emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
108 genConsumeOperands(node);
110 op1Reg = op1->gtRegNum;
111 op2Reg = op2->gtRegNum;
113 if ((op1Reg != targetReg) && (op2Reg == targetReg) && node->isRMWHWIntrinsic(compiler))
115 // We have "reg2 = reg1 op reg2" where "reg1 != reg2" on a RMW intrinsic.
117 // For non-commutative intrinsics, we should have ensured that op2 was marked
118 // delay free in order to prevent it from getting assigned the same register
119 // as target. However, for commutative intrinsics, we can just swap the operands
120 // in order to have "reg2 = reg2 op reg1" which will end up producing the right code.
122 noway_assert(node->OperIsCommutative());
127 if (category == HW_Category_MemoryStore)
129 emit->emitIns_AR_R(ins, simdSize, op2Reg, op1Reg, 0);
131 else if ((ival != -1) && varTypeIsFloating(baseType))
133 genHWIntrinsic_R_R_RM_I(node, ins);
135 else if (category == HW_Category_MemoryLoad)
137 emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg);
139 else if (Compiler::isImmHWIntrinsic(intrinsicID, op2))
141 if (intrinsicID == NI_SSE2_Extract)
143 // extract instructions return to GP-registers, so it needs int size as the emitsize
144 simdSize = emitTypeSize(TYP_INT);
146 auto emitSwCase = [&](unsigned i) {
147 emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, (int)i);
150 if (op2->IsCnsIntOrI())
152 ssize_t ival = op2->AsIntCon()->IconValue();
153 emitSwCase((unsigned)ival);
157 // We emit a fallback case for the scenario when the imm-op is not a constant. This should
158 // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
159 // can also occur if the consumer calls it directly and just doesn't pass a constant value.
160 regNumber baseReg = node->ExtractTempReg();
161 regNumber offsReg = node->GetSingleTempReg();
162 genHWIntrinsicJumpTableFallback(intrinsicID, op2Reg, baseReg, offsReg, emitSwCase);
167 genHWIntrinsic_R_R_RM(node, ins);
174 assert(op1->OperIsList());
175 assert(op1->gtGetOp2()->OperIsList());
176 assert(op1->gtGetOp2()->gtGetOp2()->OperIsList());
178 GenTreeArgList* argList = op1->AsArgList();
179 op1 = argList->Current();
181 op1Reg = op1->gtRegNum;
183 argList = argList->Rest();
184 op2 = argList->Current();
186 op2Reg = op2->gtRegNum;
188 argList = argList->Rest();
189 GenTree* op3 = argList->Current();
191 regNumber op3Reg = op3->gtRegNum;
193 if (Compiler::isImmHWIntrinsic(intrinsicID, op3))
195 auto emitSwCase = [&](unsigned i) {
196 emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, (int)i);
198 if (op3->IsCnsIntOrI())
200 ssize_t ival = op3->AsIntCon()->IconValue();
201 emitSwCase((unsigned)ival);
205 // We emit a fallback case for the scenario when the imm-op is not a constant. This should
206 // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
207 // can also occur if the consumer calls it directly and just doesn't pass a constant value.
208 regNumber baseReg = node->ExtractTempReg();
209 regNumber offsReg = node->GetSingleTempReg();
210 genHWIntrinsicJumpTableFallback(intrinsicID, op3Reg, baseReg, offsReg, emitSwCase);
213 else if (category == HW_Category_MemoryStore)
215 assert(intrinsicID == NI_SSE2_MaskMove);
216 assert(targetReg == REG_NA);
218 // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI
219 if (op3Reg != REG_EDI)
221 emit->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg);
223 emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg);
227 emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg);
242 case InstructionSet_SSE:
243 genSSEIntrinsic(node);
245 case InstructionSet_SSE2:
246 genSSE2Intrinsic(node);
248 case InstructionSet_SSE41:
249 genSSE41Intrinsic(node);
251 case InstructionSet_SSE42:
252 genSSE42Intrinsic(node);
254 case InstructionSet_AVX:
255 genAVXIntrinsic(node);
257 case InstructionSet_AVX2:
258 genAVX2Intrinsic(node);
260 case InstructionSet_AES:
261 genAESIntrinsic(node);
263 case InstructionSet_BMI1:
264 genBMI1Intrinsic(node);
266 case InstructionSet_BMI2:
267 genBMI2Intrinsic(node);
269 case InstructionSet_FMA:
270 genFMAIntrinsic(node);
272 case InstructionSet_LZCNT:
273 genLZCNTIntrinsic(node);
275 case InstructionSet_PCLMULQDQ:
276 genPCLMULQDQIntrinsic(node);
278 case InstructionSet_POPCNT:
279 genPOPCNTIntrinsic(node);
287 //------------------------------------------------------------------------
288 // genHWIntrinsic_R_R_RM: Generates the code for a hardware intrinsic node that takes a register operand, a
289 // register/memory operand, and that returns a value in register
292 // node - The hardware intrinsic node
293 // ins - The instruction being generated
295 void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
297 var_types targetType = node->TypeGet();
298 regNumber targetReg = node->gtRegNum;
299 GenTree* op1 = node->gtGetOp1();
300 GenTree* op2 = node->gtGetOp2();
301 emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
302 emitter* emit = getEmitter();
304 // TODO-XArch-CQ: Commutative operations can have op1 be contained
305 // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
307 regNumber op1Reg = op1->gtRegNum;
309 assert(targetReg != REG_NA);
310 assert(op1Reg != REG_NA);
312 if (op2->isContained() || op2->isUsedFromSpillTemp())
314 assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0);
315 assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional());
317 TempDsc* tmpDsc = nullptr;
318 unsigned varNum = BAD_VAR_NUM;
319 unsigned offset = (unsigned)-1;
321 if (op2->isUsedFromSpillTemp())
323 assert(op2->IsRegOptional());
325 tmpDsc = getSpillTempDsc(op2);
326 varNum = tmpDsc->tdTempNum();
329 compiler->tmpRlsTemp(tmpDsc);
331 else if (op2->OperIsHWIntrinsic())
333 emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum);
336 else if (op2->isIndir())
338 GenTreeIndir* memIndir = op2->AsIndir();
339 GenTree* memBase = memIndir->gtOp1;
341 switch (memBase->OperGet())
343 case GT_LCL_VAR_ADDR:
345 varNum = memBase->AsLclVarCommon()->GetLclNum();
348 // Ensure that all the GenTreeIndir values are set to their defaults.
349 assert(!memIndir->HasIndex());
350 assert(memIndir->Scale() == 1);
351 assert(memIndir->Offset() == 0);
356 case GT_CLS_VAR_ADDR:
358 emit->emitIns_SIMD_R_R_C(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0);
364 emit->emitIns_SIMD_R_R_A(ins, simdSize, targetReg, op1Reg, memIndir);
371 switch (op2->OperGet())
375 GenTreeLclFld* lclField = op2->AsLclFld();
377 varNum = lclField->GetLclNum();
378 offset = lclField->gtLclFld.gtLclOffs;
384 assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate());
385 varNum = op2->AsLclVar()->GetLclNum();
396 // Ensure we got a good varNum and offset.
397 // We also need to check for `tmpDsc != nullptr` since spill temp numbers
398 // are negative and start with -1, which also happens to be BAD_VAR_NUM.
399 assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
400 assert(offset != (unsigned)-1);
402 emit->emitIns_SIMD_R_R_S(ins, simdSize, targetReg, op1Reg, varNum, offset);
406 regNumber op2Reg = op2->gtRegNum;
408 if ((op1Reg != targetReg) && (op2Reg == targetReg) && node->isRMWHWIntrinsic(compiler))
410 // We have "reg2 = reg1 op reg2" where "reg1 != reg2" on a RMW intrinsic.
412 // For non-commutative intrinsics, we should have ensured that op2 was marked
413 // delay free in order to prevent it from getting assigned the same register
414 // as target. However, for commutative intrinsics, we can just swap the operands
415 // in order to have "reg2 = reg2 op reg1" which will end up producing the right code.
417 noway_assert(node->OperIsCommutative());
422 emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg);
426 //------------------------------------------------------------------------
427 // genHWIntrinsic_R_R_RM_I: Generates the code for a hardware intrinsic node that takes a register operand, a
428 // register/memory operand, an immediate operand, and that returns a value in register
431 // node - The hardware intrinsic node
432 // ins - The instruction being generated
434 void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
436 var_types targetType = node->TypeGet();
437 regNumber targetReg = node->gtRegNum;
438 GenTree* op1 = node->gtGetOp1();
439 GenTree* op2 = node->gtGetOp2();
440 emitAttr simdSize = EA_ATTR(node->gtSIMDSize);
441 int ival = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId);
442 emitter* emit = getEmitter();
444 // TODO-XArch-CQ: Commutative operations can have op1 be contained
445 // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
447 regNumber op1Reg = op1->gtRegNum;
449 assert(targetReg != REG_NA);
450 assert(op1Reg != REG_NA);
452 if (op2->isContained() || op2->isUsedFromSpillTemp())
454 assert((Compiler::flagsOfHWIntrinsic(node->gtHWIntrinsicId) & HW_Flag_NoContainment) == 0);
455 assert(compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2) || op2->IsRegOptional());
457 TempDsc* tmpDsc = nullptr;
458 unsigned varNum = BAD_VAR_NUM;
459 unsigned offset = (unsigned)-1;
461 if (op2->isUsedFromSpillTemp())
463 assert(op2->IsRegOptional());
465 tmpDsc = getSpillTempDsc(op2);
466 varNum = tmpDsc->tdTempNum();
469 compiler->tmpRlsTemp(tmpDsc);
471 else if (op2->OperIsHWIntrinsic())
473 emit->emitIns_SIMD_R_R_AR_I(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum, ival);
476 else if (op2->isIndir())
478 GenTreeIndir* memIndir = op2->AsIndir();
479 GenTree* memBase = memIndir->gtOp1;
481 switch (memBase->OperGet())
483 case GT_LCL_VAR_ADDR:
485 varNum = memBase->AsLclVarCommon()->GetLclNum();
488 // Ensure that all the GenTreeIndir values are set to their defaults.
489 assert(!memIndir->HasIndex());
490 assert(memIndir->Scale() == 1);
491 assert(memIndir->Offset() == 0);
496 case GT_CLS_VAR_ADDR:
498 emit->emitIns_SIMD_R_R_C_I(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0,
505 emit->emitIns_SIMD_R_R_A_I(ins, simdSize, targetReg, op1Reg, memIndir, ival);
512 switch (op2->OperGet())
516 GenTreeLclFld* lclField = op2->AsLclFld();
518 varNum = lclField->GetLclNum();
519 offset = lclField->gtLclFld.gtLclOffs;
525 assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate());
526 varNum = op2->AsLclVar()->GetLclNum();
537 // Ensure we got a good varNum and offset.
538 // We also need to check for `tmpDsc != nullptr` since spill temp numbers
539 // are negative and start with -1, which also happens to be BAD_VAR_NUM.
540 assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
541 assert(offset != (unsigned)-1);
543 emit->emitIns_SIMD_R_R_S_I(ins, simdSize, targetReg, op1Reg, varNum, offset, ival);
547 regNumber op2Reg = op2->gtRegNum;
549 if ((op1Reg != targetReg) && (op2Reg == targetReg) && node->isRMWHWIntrinsic(compiler))
551 // We have "reg2 = reg1 op reg2" where "reg1 != reg2" on a RMW intrinsic.
553 // For non-commutative intrinsics, we should have ensured that op2 was marked
554 // delay free in order to prevent it from getting assigned the same register
555 // as target. However, for commutative intrinsics, we can just swap the operands
556 // in order to have "reg2 = reg2 op reg1" which will end up producing the right code.
558 noway_assert(node->OperIsCommutative());
563 emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, ival);
567 // genHWIntrinsicJumpTableFallback : generate the jump-table fallback for imm-intrinsics
568 // with non-constant argument
571 // intrinsic - intrinsic ID
572 // nonConstImmReg - the register contains non-constant imm8 argument
573 // baseReg - a register for the start of the switch table
574 // offsReg - a register for the offset into the switch table
575 // emitSwCase - the lambda to generate siwtch-case
578 // generate the jump-table fallback for imm-intrinsics with non-constant argument.
580 // This function can be used for all imm-intrinsics (whether full-range or not),
581 // The compiler front-end (i.e. importer) is responsible to insert a range-check IR
582 // (GT_HW_INTRINSIC_CHK) for imm8 argument, so this function does not need to do range-check.
584 template <typename HWIntrinsicSwitchCaseBody>
585 void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsic,
586 regNumber nonConstImmReg,
589 HWIntrinsicSwitchCaseBody emitSwCase)
591 assert(nonConstImmReg != REG_NA);
592 emitter* emit = getEmitter();
594 const unsigned maxByte = (unsigned)Compiler::immUpperBoundOfHWIntrinsic(intrinsic) + 1;
595 assert(maxByte <= 256);
596 BasicBlock* jmpTable[256];
598 unsigned jmpTableBase = emit->emitBBTableDataGenBeg(maxByte, true);
599 unsigned jmpTableOffs = 0;
601 // Emit the jump table
602 for (unsigned i = 0; i < maxByte; i++)
604 jmpTable[i] = genCreateTempLabel();
605 emit->emitDataGenData(i, jmpTable[i]);
608 emit->emitDataGenEnd();
610 // Compute and jump to the appropriate offset in the switch table
611 emit->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), offsReg, compiler->eeFindJitDataOffs(jmpTableBase), 0);
613 emit->emitIns_R_ARX(INS_mov, EA_4BYTE, offsReg, offsReg, nonConstImmReg, 4, 0);
614 emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, baseReg);
615 emit->emitIns_R_R(INS_add, EA_PTRSIZE, offsReg, baseReg);
616 emit->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), offsReg);
618 // Emit the switch table entries
620 BasicBlock* switchTableBeg = genCreateTempLabel();
621 BasicBlock* switchTableEnd = genCreateTempLabel();
623 genDefineTempLabel(switchTableBeg);
625 for (unsigned i = 0; i < maxByte; i++)
627 genDefineTempLabel(jmpTable[i]);
629 emit->emitIns_J(INS_jmp, switchTableEnd);
632 genDefineTempLabel(switchTableEnd);
635 //------------------------------------------------------------------------
636 // genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node
639 // node - The hardware intrinsic node
641 void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
643 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
644 GenTree* op1 = node->gtGetOp1();
645 GenTree* op2 = node->gtGetOp2();
646 GenTree* op3 = nullptr;
647 GenTree* op4 = nullptr;
648 regNumber targetReg = node->gtRegNum;
649 var_types targetType = node->TypeGet();
650 var_types baseType = node->gtSIMDBaseType;
652 regNumber op1Reg = REG_NA;
653 regNumber op2Reg = REG_NA;
654 regNumber op3Reg = REG_NA;
655 regNumber op4Reg = REG_NA;
656 emitter* emit = getEmitter();
658 if ((op1 != nullptr) && !op1->OperIsList())
660 op1Reg = op1->gtRegNum;
661 genConsumeOperands(node);
666 case NI_SSE_CompareEqualOrderedScalar:
667 case NI_SSE_CompareEqualUnorderedScalar:
669 assert(baseType == TYP_FLOAT);
670 op2Reg = op2->gtRegNum;
671 regNumber tmpReg = node->GetSingleTempReg();
672 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
674 // Ensure we aren't overwriting targetReg
675 assert(tmpReg != targetReg);
677 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
678 emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
679 emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
680 emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
681 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
685 case NI_SSE_CompareGreaterThanOrderedScalar:
686 case NI_SSE_CompareGreaterThanUnorderedScalar:
688 assert(baseType == TYP_FLOAT);
689 op2Reg = op2->gtRegNum;
691 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
692 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
693 emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
694 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
698 case NI_SSE_CompareGreaterThanOrEqualOrderedScalar:
699 case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar:
701 assert(baseType == TYP_FLOAT);
702 op2Reg = op2->gtRegNum;
704 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
705 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
706 emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
707 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
711 case NI_SSE_CompareLessThanOrderedScalar:
712 case NI_SSE_CompareLessThanUnorderedScalar:
714 assert(baseType == TYP_FLOAT);
715 op2Reg = op2->gtRegNum;
717 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
718 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
719 emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
720 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
724 case NI_SSE_CompareLessThanOrEqualOrderedScalar:
725 case NI_SSE_CompareLessThanOrEqualUnorderedScalar:
727 assert(baseType == TYP_FLOAT);
728 op2Reg = op2->gtRegNum;
730 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
731 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
732 emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
733 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
737 case NI_SSE_CompareNotEqualOrderedScalar:
738 case NI_SSE_CompareNotEqualUnorderedScalar:
740 assert(baseType == TYP_FLOAT);
741 op2Reg = op2->gtRegNum;
742 regNumber tmpReg = node->GetSingleTempReg();
743 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
745 // Ensure we aren't overwriting targetReg
746 assert(tmpReg != targetReg);
748 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
749 emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
750 emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
751 emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
752 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
756 case NI_SSE_ConvertToSingle:
757 case NI_SSE_StaticCast:
759 assert(op2 == nullptr);
760 if (op1Reg != targetReg)
762 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
763 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg);
768 case NI_SSE_MoveMask:
770 assert(baseType == TYP_FLOAT);
771 assert(op2 == nullptr);
773 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
774 emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
778 case NI_SSE_Prefetch0:
779 case NI_SSE_Prefetch1:
780 case NI_SSE_Prefetch2:
781 case NI_SSE_PrefetchNonTemporal:
783 assert(baseType == TYP_UBYTE);
784 assert(op2 == nullptr);
786 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
787 emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0);
791 case NI_SSE_SetScalarVector128:
793 assert(baseType == TYP_FLOAT);
794 assert(op2 == nullptr);
796 if (op1Reg == targetReg)
798 regNumber tmpReg = node->GetSingleTempReg();
800 // Ensure we aren't overwriting targetReg
801 assert(tmpReg != targetReg);
803 emit->emitIns_R_R(INS_movaps, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
807 emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
808 emit->emitIns_SIMD_R_R_R(INS_movss, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
812 case NI_SSE_SetZeroVector128:
814 assert(baseType == TYP_FLOAT);
815 assert(op1 == nullptr);
816 assert(op2 == nullptr);
817 emit->emitIns_SIMD_R_R_R(INS_xorps, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
821 case NI_SSE_StoreFence:
823 assert(baseType == TYP_VOID);
824 assert(op1 == nullptr);
825 assert(op2 == nullptr);
826 emit->emitIns(INS_sfence);
838 //------------------------------------------------------------------------
839 // genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node
842 // node - The hardware intrinsic node
844 void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
846 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
847 GenTree* op1 = node->gtGetOp1();
848 GenTree* op2 = node->gtGetOp2();
849 regNumber targetReg = node->gtRegNum;
850 var_types targetType = node->TypeGet();
851 var_types baseType = node->gtSIMDBaseType;
852 regNumber op1Reg = REG_NA;
853 regNumber op2Reg = REG_NA;
854 emitter* emit = getEmitter();
857 if ((op1 != nullptr) && !op1->OperIsList())
859 op1Reg = op1->gtRegNum;
860 genConsumeOperands(node);
865 // All integer overloads are handled by table codegen
866 case NI_SSE2_CompareLessThan:
868 assert(op1 != nullptr);
869 assert(op2 != nullptr);
870 assert(baseType == TYP_DOUBLE);
871 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
872 op2Reg = op2->gtRegNum;
873 ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
875 emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);
880 case NI_SSE2_CompareEqualOrderedScalar:
881 case NI_SSE2_CompareEqualUnorderedScalar:
883 assert(baseType == TYP_DOUBLE);
884 op2Reg = op2->gtRegNum;
885 regNumber tmpReg = node->GetSingleTempReg();
886 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
888 // Ensure we aren't overwriting targetReg
889 assert(tmpReg != targetReg);
891 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
892 emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg);
893 emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg);
894 emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg);
895 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
899 case NI_SSE2_CompareGreaterThanOrderedScalar:
900 case NI_SSE2_CompareGreaterThanUnorderedScalar:
902 assert(baseType == TYP_DOUBLE);
903 op2Reg = op2->gtRegNum;
904 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
906 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
907 emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
908 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
912 case NI_SSE2_CompareGreaterThanOrEqualOrderedScalar:
913 case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar:
915 assert(baseType == TYP_DOUBLE);
916 op2Reg = op2->gtRegNum;
917 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
919 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
920 emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
921 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
925 case NI_SSE2_CompareLessThanOrderedScalar:
926 case NI_SSE2_CompareLessThanUnorderedScalar:
928 assert(baseType == TYP_DOUBLE);
929 op2Reg = op2->gtRegNum;
930 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
932 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
933 emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
934 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
938 case NI_SSE2_CompareLessThanOrEqualOrderedScalar:
939 case NI_SSE2_CompareLessThanOrEqualUnorderedScalar:
941 assert(baseType == TYP_DOUBLE);
942 op2Reg = op2->gtRegNum;
943 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
945 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg);
946 emit->emitIns_R(INS_setae, EA_1BYTE, targetReg);
947 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg);
951 case NI_SSE2_CompareNotEqualOrderedScalar:
952 case NI_SSE2_CompareNotEqualUnorderedScalar:
954 assert(baseType == TYP_DOUBLE);
955 op2Reg = op2->gtRegNum;
956 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
957 regNumber tmpReg = node->GetSingleTempReg();
959 // Ensure we aren't overwriting targetReg
960 assert(tmpReg != targetReg);
962 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg);
963 emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg);
964 emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg);
965 emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg);
966 emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, tmpReg);
970 case NI_SSE2_ConvertScalarToVector128Double:
971 case NI_SSE2_ConvertScalarToVector128Single:
973 assert(baseType == TYP_INT || baseType == TYP_LONG || baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
974 assert(op1 != nullptr);
975 assert(op2 != nullptr);
976 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
977 genHWIntrinsic_R_R_RM(node, ins);
981 case NI_SSE2_ConvertScalarToVector128Int64:
982 case NI_SSE2_ConvertScalarToVector128UInt64:
984 assert(baseType == TYP_LONG || baseType == TYP_ULONG);
985 assert(op1 != nullptr);
986 assert(op2 == nullptr);
987 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
988 emit->emitIns_R_R(ins, emitTypeSize(baseType), targetReg, op1Reg);
992 case NI_SSE2_ConvertToDouble:
994 assert(op2 == nullptr);
995 if (op1Reg != targetReg)
997 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
998 emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
1003 case NI_SSE2_ConvertToInt32:
1004 case NI_SSE2_ConvertToInt64:
1005 case NI_SSE2_ConvertToUInt32:
1006 case NI_SSE2_ConvertToUInt64:
1008 assert(op2 == nullptr);
1009 assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT || baseType == TYP_INT || baseType == TYP_UINT ||
1010 baseType == TYP_LONG || baseType == TYP_ULONG);
1011 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1012 if (baseType == TYP_DOUBLE || baseType == TYP_FLOAT)
1014 emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
1018 emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
1023 case NI_SSE2_LoadFence:
1025 assert(baseType == TYP_VOID);
1026 assert(op1 == nullptr);
1027 assert(op2 == nullptr);
1028 emit->emitIns(INS_lfence);
1032 case NI_SSE2_MemoryFence:
1034 assert(baseType == TYP_VOID);
1035 assert(op1 == nullptr);
1036 assert(op2 == nullptr);
1037 emit->emitIns(INS_mfence);
1041 case NI_SSE2_MoveMask:
1043 assert(op2 == nullptr);
1044 assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);
1046 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1047 emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
1051 case NI_SSE2_SetScalarVector128:
1053 assert(baseType == TYP_DOUBLE);
1054 assert(op2 == nullptr);
1056 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
1057 if (op1Reg == targetReg)
1059 regNumber tmpReg = node->GetSingleTempReg();
1061 // Ensure we aren't overwriting targetReg
1062 assert(tmpReg != targetReg);
1064 emit->emitIns_R_R(INS_movapd, emitTypeSize(TYP_SIMD16), tmpReg, op1Reg);
1068 emit->emitIns_SIMD_R_R_R(INS_xorpd, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
1069 emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, op1Reg);
1073 case NI_SSE2_SetZeroVector128:
1075 assert(baseType != TYP_FLOAT);
1076 assert(baseType >= TYP_BYTE && baseType <= TYP_DOUBLE);
1077 assert(op1 == nullptr);
1078 assert(op2 == nullptr);
1080 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1081 emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
1085 case NI_SSE2_StoreNonTemporal:
1087 assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG);
1088 assert(op1 != nullptr);
1089 assert(op2 != nullptr);
1091 op2Reg = op2->gtRegNum;
1092 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1093 emit->emitIns_AR_R(ins, emitTypeSize(baseType), op2Reg, op1Reg, 0);
1102 genProduceReg(node);
1105 //------------------------------------------------------------------------
1106 // genSSE41Intrinsic: Generates the code for an SSE4.1 hardware intrinsic node
1109 // node - The hardware intrinsic node
1111 void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
1113 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1114 GenTree* op1 = node->gtGetOp1();
1115 GenTree* op2 = node->gtGetOp2();
1116 GenTree* op3 = nullptr;
1117 GenTree* op4 = nullptr;
1118 regNumber targetReg = node->gtRegNum;
1119 var_types targetType = node->TypeGet();
1120 var_types baseType = node->gtSIMDBaseType;
1122 regNumber op1Reg = REG_NA;
1123 regNumber op2Reg = REG_NA;
1124 regNumber op3Reg = REG_NA;
1125 regNumber op4Reg = REG_NA;
1126 emitter* emit = getEmitter();
1128 if ((op1 != nullptr) && !op1->OperIsList())
1130 op1Reg = op1->gtRegNum;
1131 genConsumeOperands(node);
1134 switch (intrinsicID)
1136 case NI_SSE41_TestAllOnes:
1138 regNumber tmpReg = node->GetSingleTempReg();
1139 assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
1140 emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg);
1141 emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1142 emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg);
1143 emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
1147 case NI_SSE41_TestAllZeros:
1148 case NI_SSE41_TestZ:
1150 assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
1151 emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1152 emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
1153 emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
1157 case NI_SSE41_TestC:
1159 assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
1160 emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1161 emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
1162 emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
1166 case NI_SSE41_TestMixOnesZeros:
1167 case NI_SSE41_TestNotZAndNotC:
1169 assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
1170 emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1171 emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
1172 emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
1176 case NI_SSE41_Extract:
1178 regNumber tmpTargetReg = REG_NA;
1179 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1180 if (baseType == TYP_FLOAT)
1182 tmpTargetReg = node->ExtractTempReg();
1184 auto emitSwCase = [&](unsigned i) {
1185 if (baseType == TYP_FLOAT)
1187 // extract instructions return to GP-registers, so it needs int size as the emitsize
1188 emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), op1Reg, tmpTargetReg, (int)i);
1189 emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg);
1193 emit->emitIns_SIMD_R_R_I(ins, emitTypeSize(TYP_INT), targetReg, op1Reg, (int)i);
1197 if (op2->IsCnsIntOrI())
1199 ssize_t ival = op2->AsIntCon()->IconValue();
1200 emitSwCase((unsigned)ival);
1204 // We emit a fallback case for the scenario when the imm-op is not a constant. This should
1205 // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
1206 // can also occur if the consumer calls it directly and just doesn't pass a constant value.
1207 regNumber baseReg = node->ExtractTempReg();
1208 regNumber offsReg = node->GetSingleTempReg();
1209 genHWIntrinsicJumpTableFallback(intrinsicID, op2->gtRegNum, baseReg, offsReg, emitSwCase);
1219 genProduceReg(node);
1222 //------------------------------------------------------------------------
1223 // genSSE42Intrinsic: Generates the code for an SSE4.2 hardware intrinsic node
1226 // node - The hardware intrinsic node
1228 void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node)
1230 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1231 GenTree* op1 = node->gtGetOp1();
1232 GenTree* op2 = node->gtGetOp2();
1233 regNumber targetReg = node->gtRegNum;
1234 assert(targetReg != REG_NA);
1235 var_types targetType = node->TypeGet();
1236 var_types baseType = node->gtSIMDBaseType;
1238 regNumber op1Reg = op1->gtRegNum;
1239 regNumber op2Reg = op2->gtRegNum;
1240 genConsumeOperands(node);
1242 switch (intrinsicID)
1244 case NI_SSE42_Crc32:
1245 if (op1Reg != targetReg)
1247 assert(op2Reg != targetReg);
1248 inst_RV_RV(INS_mov, targetReg, op1Reg, targetType, emitTypeSize(targetType));
1251 if (baseType == TYP_UBYTE || baseType == TYP_USHORT) // baseType is the type of the second argument
1253 assert(targetType == TYP_INT);
1254 inst_RV_RV(INS_crc32, targetReg, op2Reg, baseType, emitTypeSize(baseType));
1258 assert(op1->TypeGet() == op2->TypeGet());
1259 assert(targetType == TYP_INT || targetType == TYP_LONG);
1260 inst_RV_RV(INS_crc32, targetReg, op2Reg, targetType, emitTypeSize(targetType));
1268 genProduceReg(node);
1271 //------------------------------------------------------------------------
1272 // genAVXIntrinsic: Generates the code for an AVX hardware intrinsic node
1275 // node - The hardware intrinsic node
1277 void CodeGen::genAVXIntrinsic(GenTreeHWIntrinsic* node)
1279 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1280 var_types baseType = node->gtSIMDBaseType;
1281 emitAttr attr = EA_ATTR(node->gtSIMDSize);
1282 var_types targetType = node->TypeGet();
1283 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
1284 GenTree* op1 = node->gtGetOp1();
1285 GenTree* op2 = node->gtGetOp2();
1286 regNumber targetReg = node->gtRegNum;
1287 emitter* emit = getEmitter();
1289 genConsumeOperands(node);
1291 switch (intrinsicID)
1293 case NI_AVX_SetZeroVector256:
1295 assert(op1 == nullptr);
1296 assert(op2 == nullptr);
1297 // SetZeroVector256 will generate pxor with integral base-typ, but pxor is a AVX2 instruction, so we
1298 // generate xorps on AVX machines.
1299 if (!compiler->compSupports(InstructionSet_AVX2) && varTypeIsIntegral(baseType))
1301 emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg);
1305 emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
1310 case NI_AVX_ExtendToVector256:
1312 // ExtendToVector256 has zero-extend semantics in order to ensure it is deterministic
1313 // We always emit a move to the target register, even when op1Reg == targetReg, in order
1314 // to ensure that Bits MAXVL-1:128 are zeroed.
1316 assert(op2 == nullptr);
1317 regNumber op1Reg = op1->gtRegNum;
1318 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg);
1322 case NI_AVX_GetLowerHalf:
1323 case NI_AVX_StaticCast:
1325 assert(op2 == nullptr);
1326 regNumber op1Reg = op1->gtRegNum;
1328 if (op1Reg != targetReg)
1330 instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
1331 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg);
1338 emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1339 emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
1340 emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
1344 case NI_AVX_TestNotZAndNotC:
1346 emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1347 emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
1348 emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
1354 emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
1355 emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum);
1356 emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
1365 genProduceReg(node);
1368 //------------------------------------------------------------------------
1369 // genAVX2Intrinsic: Generates the code for an AVX2 hardware intrinsic node
1372 // node - The hardware intrinsic node
1374 void CodeGen::genAVX2Intrinsic(GenTreeHWIntrinsic* node)
1376 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1377 var_types baseType = node->gtSIMDBaseType;
1378 instruction ins = INS_invalid;
1380 genConsumeOperands(node);
1382 switch (intrinsicID)
1389 genProduceReg(node);
1392 //------------------------------------------------------------------------
1393 // genAESIntrinsic: Generates the code for an AES hardware intrinsic node
1396 // node - The hardware intrinsic node
1398 void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node)
1400 NYI("Implement AES intrinsic code generation");
1403 //------------------------------------------------------------------------
1404 // genBMI1Intrinsic: Generates the code for a BMI1 hardware intrinsic node
1407 // node - The hardware intrinsic node
1409 void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node)
1411 NYI("Implement BMI1 intrinsic code generation");
1414 //------------------------------------------------------------------------
1415 // genBMI2Intrinsic: Generates the code for a BMI2 hardware intrinsic node
1418 // node - The hardware intrinsic node
1420 void CodeGen::genBMI2Intrinsic(GenTreeHWIntrinsic* node)
1422 NYI("Implement BMI2 intrinsic code generation");
1425 //------------------------------------------------------------------------
1426 // genFMAIntrinsic: Generates the code for an FMA hardware intrinsic node
1429 // node - The hardware intrinsic node
1431 void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
1433 NYI("Implement FMA intrinsic code generation");
1436 //------------------------------------------------------------------------
1437 // genLZCNTIntrinsic: Generates the code for a LZCNT hardware intrinsic node
1440 // node - The hardware intrinsic node
1442 void CodeGen::genLZCNTIntrinsic(GenTreeHWIntrinsic* node)
1444 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1445 GenTree* op1 = node->gtGetOp1();
1446 regNumber targetReg = node->gtRegNum;
1447 assert(targetReg != REG_NA);
1448 var_types targetType = node->TypeGet();
1449 regNumber op1Reg = op1->gtRegNum;
1450 genConsumeOperands(node);
1452 assert(intrinsicID == NI_LZCNT_LeadingZeroCount);
1454 inst_RV_RV(INS_lzcnt, targetReg, op1Reg, targetType, emitTypeSize(targetType));
1456 genProduceReg(node);
1459 //------------------------------------------------------------------------
1460 // genPCLMULQDQIntrinsic: Generates the code for a PCLMULQDQ hardware intrinsic node
1463 // node - The hardware intrinsic node
1465 void CodeGen::genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node)
1467 NYI("Implement PCLMULQDQ intrinsic code generation");
1470 //------------------------------------------------------------------------
1471 // genPOPCNTIntrinsic: Generates the code for a POPCNT hardware intrinsic node
1474 // node - The hardware intrinsic node
1476 void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node)
1478 NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
1479 GenTree* op1 = node->gtGetOp1();
1480 regNumber targetReg = node->gtRegNum;
1481 assert(targetReg != REG_NA);
1482 var_types targetType = node->TypeGet();
1483 regNumber op1Reg = op1->gtRegNum;
1484 genConsumeOperands(node);
1486 assert(intrinsicID == NI_POPCNT_PopCount);
1488 inst_RV_RV(INS_popcnt, targetReg, op1Reg, targetType, emitTypeSize(targetType));
1490 genProduceReg(node);
1493 #endif // FEATURE_HW_INTRINSICS