1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
5 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
8 XX Register Requirements for AMD64 XX
10 XX This encapsulates all the logic for setting register requirements for XX
11 XX the AMD64 architecture. XX
14 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
15 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
23 #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
28 #include "sideeffects.h"
31 //------------------------------------------------------------------------
32 // BuildNode: Set register requirements for a node
35 // treeNode - the node of interest
39 // LSRA Has been initialized and there is a TreeNodeInfo node
40 // already allocated and initialized for every tree in the IR.
42 // Every TreeNodeInfo instance has the right annotations on register
43 // requirements needed by LSRA to build the Interval Table (source,
44 // destination and internal [temp] register counts).
46 void LinearScan::BuildNode(GenTree* tree)
48 TreeNodeInfo* info = currentNodeInfo;
49 assert(!tree->isContained());
54 if (tree->IsUnusedValue())
56 info->isLocalDefUse = true;
64 // floating type generates AVX instruction (vmovss etc.), set the flag
65 SetContainsAVXFlags(varTypeIsFloating(tree->TypeGet()));
66 switch (tree->OperGet())
73 // Because we do containment analysis before we redo dataflow and identify register
74 // candidates, the containment analysis only !lvDoNotEnregister to estimate register
76 // If there is a lclVar that is estimated to be register candidate but
77 // is not, if they were marked regOptional they should now be marked contained instead.
78 // TODO-XArch-CQ: When this is being called while RefPositions are being created,
79 // use lvLRACandidate here instead.
80 if (tree->IsRegOptional())
82 if (!compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvTracked ||
83 compiler->lvaTable[tree->AsLclVarCommon()->gtLclNum].lvDoNotEnregister)
85 tree->ClearRegOptional();
97 // Need an additional register to read upper 4 bytes of Vector3.
98 if (tree->TypeGet() == TYP_SIMD12)
100 // We need an internal register different from targetReg in which 'tree' produces its result
101 // because both targetReg and internal reg will be in use at the same time.
102 info->internalFloatCount = 1;
103 info->isInternalRegDelayFree = true;
104 info->setInternalCandidates(this, allSIMDRegs());
109 case GT_STORE_LCL_FLD:
110 case GT_STORE_LCL_VAR:
111 BuildStoreLoc(tree->AsLclVarCommon());
121 assert(info->dstCount == 0);
126 assert(info->dstCount == 1);
129 #if !defined(_TARGET_64BIT_)
132 assert(tree->IsUnusedValue()); // Contained nodes are already processed, only unused GT_LONG can reach here.
133 // An unused GT_LONG node needs to consume its sources, but need not produce a register.
134 tree->gtType = TYP_VOID;
135 tree->ClearUnusedValue();
136 info->isLocalDefUse = false;
139 appendLocationInfoToList(tree->gtGetOp1());
140 appendLocationInfoToList(tree->gtGetOp2());
143 #endif // !defined(_TARGET_64BIT_)
150 assert(info->dstCount == 0);
159 assert(info->dstCount == 0);
160 if (tree->TypeGet() == TYP_VOID)
166 assert(tree->TypeGet() == TYP_INT);
170 info->setSrcCandidates(this, RBM_INTRET);
171 LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
172 locationInfo->info.setSrcCandidates(this, RBM_INTRET);
173 useList.Append(locationInfo);
177 // A GT_NOP is either a passthrough (if it is void, or if it has
178 // a child), but must be considered to produce a dummy value if it
179 // has a type but no child
182 assert((tree->gtOp.gtOp1 == nullptr) || tree->isContained());
183 if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
185 assert(info->dstCount == 1);
189 assert(info->dstCount == 0);
196 assert(info->dstCount == 0);
197 GenTree* cmp = tree->gtGetOp1();
198 assert(!cmp->IsValue());
204 assert(info->dstCount == 0);
209 assert(info->dstCount == 1);
211 info->setDstCandidates(this, RBM_BYTE_REGS);
212 #endif // _TARGET_X86_
217 assert(info->dstCount == 0);
221 // This should never occur since switch nodes must not be visible at this
224 noway_assert(!"Switch must be lowered at this point");
229 assert(info->dstCount == 1);
232 case GT_SWITCH_TABLE:
233 info->internalIntCount = 1;
234 assert(info->dstCount == 0);
235 info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
236 assert(info->srcCount == 2);
240 noway_assert(!"We should never hit any assignment operator in lowering");
244 #if !defined(_TARGET_64BIT_)
256 info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
260 // This just turns into a compare of its child with an int + a conditional call.
261 info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
262 assert(info->dstCount == 0);
263 info->internalIntCount = 1;
264 info->setInternalCandidates(this, allRegs(TYP_INT));
271 BuildModDiv(tree->AsOp());
276 #if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
279 BuildMul(tree->AsOp());
283 BuildIntrinsic(tree->AsOp());
288 BuildSIMD(tree->AsSIMD());
290 #endif // FEATURE_SIMD
292 #ifdef FEATURE_HW_INTRINSICS
294 BuildHWIntrinsic(tree->AsHWIntrinsic());
296 #endif // FEATURE_HW_INTRINSICS
304 LocationInfoListNode* locationInfo = getLocationInfo(tree->gtOp.gtOp1);
305 locationInfo->info.isTgtPref = true;
306 useList.Append(locationInfo);
313 info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
316 // SSE instruction set doesn't have an instruction to negate a number.
317 // The recommended way is to xor the float/double number with a bitmask.
318 // The only way to xor is using xorps or xorpd both of which operate on
319 // 128-bit operands. To hold the bit-mask we would need another xmm
320 // register or a 16-byte aligned 128-bit data constant. Right now emitter
321 // lacks the support for emitting such constants or instruction with mem
322 // addressing mode referring to a 128-bit operand. For now we use an
323 // internal xmm register to load 32/64-bit bitmask from data section.
324 // Note that by trading additional data section memory (128-bit) we can
325 // save on the need for an internal register and also a memory-to-reg
328 // Note: another option to avoid internal register requirement is by
329 // lowering as GT_SUB(0, src). This will generate code different from
330 // Jit64 and could possibly result in compat issues (?).
331 if (varTypeIsFloating(tree))
333 info->internalFloatCount = 1;
334 info->setInternalCandidates(this, internalFloatRegCandidates());
339 info->srcCount = GetOperandInfo(tree->gtOp.gtOp1);
351 (void)BuildShiftRotate(tree);
367 appendLocationInfoToList(tree->gtOp.gtOp1);
369 assert(info->dstCount == 1);
370 info->internalIntCount = 1;
376 assert(info->dstCount == 1);
378 // comparand is preferenced to RAX.
379 // Remaining two operands can be in any reg other than RAX.
380 LocationInfoListNode* locationInfo = getLocationInfo(tree->gtCmpXchg.gtOpLocation);
381 locationInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
382 useList.Append(locationInfo);
383 LocationInfoListNode* valueInfo = getLocationInfo(tree->gtCmpXchg.gtOpValue);
384 valueInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RAX);
385 useList.Append(valueInfo);
386 info->setDstCandidates(this, RBM_RAX);
387 LocationInfoListNode* comparandInfo = getLocationInfo(tree->gtCmpXchg.gtOpComparand);
388 comparandInfo->info.setSrcCandidates(this, RBM_RAX);
389 useList.Append(comparandInfo);
394 info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
395 assert(info->dstCount == (tree->TypeGet() == TYP_VOID) ? 0 : 1);
399 BuildPutArgReg(tree->AsUnOp());
403 BuildCall(tree->AsCall());
408 // For a GT_ADDR, the child node should not be evaluated into a register
409 GenTree* child = tree->gtOp.gtOp1;
410 assert(!isCandidateLocalRef(child));
411 assert(child->isContained());
412 assert(info->dstCount == 1);
417 #if !defined(FEATURE_PUT_STRUCT_ARG_STK)
422 // These should all be eliminated prior to Lowering.
423 assert(!"Non-store block node in Lowering");
427 #ifdef FEATURE_PUT_STRUCT_ARG_STK
429 BuildPutArgStk(tree->AsPutArgStk());
431 #endif // FEATURE_PUT_STRUCT_ARG_STK
435 case GT_STORE_DYN_BLK:
436 BuildBlockStore(tree->AsBlk());
440 // Always a passthrough of its child's value.
441 assert(!"INIT_VAL should always be contained");
448 case GT_ARR_BOUNDS_CHECK:
451 #endif // FEATURE_SIMD
452 // Consumes arrLen & index - has no result
454 assert(info->dstCount == 0);
455 info->srcCount = GetOperandInfo(tree->AsBoundsChk()->gtIndex);
456 info->srcCount += GetOperandInfo(tree->AsBoundsChk()->gtArrLen);
460 // These must have been lowered to GT_ARR_INDEX
461 noway_assert(!"We should never see a GT_ARR_ELEM after Lowering.");
468 assert(info->dstCount == 1);
469 assert(!tree->AsArrIndex()->ArrObj()->isContained());
470 assert(!tree->AsArrIndex()->IndexExpr()->isContained());
471 // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
472 // times while the result is being computed.
473 LocationInfoListNode* arrObjInfo = getLocationInfo(tree->AsArrIndex()->ArrObj());
474 arrObjInfo->info.isDelayFree = true;
475 useList.Append(arrObjInfo);
476 useList.Append(getLocationInfo(tree->AsArrIndex()->IndexExpr()));
477 info->hasDelayFreeSrc = true;
482 // This consumes the offset, if any, the arrObj and the effective index,
483 // and produces the flattened offset for this dimension.
484 assert(info->dstCount == 1);
485 if (tree->gtArrOffs.gtOffset->isContained())
491 // Here we simply need an internal register, which must be different
492 // from any of the operand's registers, but may be the same as targetReg.
494 info->internalIntCount = 1;
495 appendLocationInfoToList(tree->AsArrOffs()->gtOffset);
497 appendLocationInfoToList(tree->AsArrOffs()->gtIndex);
498 appendLocationInfoToList(tree->AsArrOffs()->gtArrObj);
502 // The LEA usually passes its operands through to the GT_IND, in which case it will
503 // be contained, but we may be instantiating an address, in which case we set them here.
505 assert(info->dstCount == 1);
506 if (tree->AsAddrMode()->HasBase())
509 appendLocationInfoToList(tree->AsAddrMode()->Base());
511 if (tree->AsAddrMode()->HasIndex())
514 appendLocationInfoToList(tree->AsAddrMode()->Index());
519 if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
521 BuildGCWriteBarrier(tree);
524 BuildIndir(tree->AsIndir());
528 assert(info->dstCount == 0);
529 appendLocationInfoToList(tree->gtOp.gtOp1);
534 BuildIndir(tree->AsIndir());
535 assert(info->dstCount == 1);
540 assert(info->dstCount == 1);
541 info->setDstCandidates(this, RBM_EXCEPTION_OBJECT);
544 #if !FEATURE_EH_FUNCLETS
547 assert(info->dstCount == 0);
552 // These nodes are eliminated by rationalizer.
553 JITDUMP("Unexpected node %s in Lower.\n", GenTree::OpName(tree->OperGet()));
558 assert(info->dstCount == 1);
559 info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
561 if (tree->AsIndexAddr()->Index()->TypeGet() == TYP_I_IMPL)
563 info->internalIntCount = 1;
567 switch (tree->AsIndexAddr()->gtElemSize)
576 info->internalIntCount = 1;
581 } // end switch (tree->OperGet())
583 // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
584 // Even then we would like to set isTgtPref on Op1.
585 if (tree->OperIsBinary() && info->srcCount >= 1)
587 if (isRMWRegOper(tree))
589 GenTree* op1 = tree->gtOp.gtOp1;
590 GenTree* op2 = tree->gtOp.gtOp2;
592 // Commutative opers like add/mul/and/or/xor could reverse the order of
593 // operands if it is safe to do so. In such a case we would like op2 to be
594 // target preferenced instead of op1.
595 if (tree->OperIsCommutative() && op1->isContained() && op2 != nullptr)
598 op2 = tree->gtOp.gtOp1;
601 // If we have a read-modify-write operation, we want to preference op1 to the target,
602 // if it is not contained.
603 if (!op1->isContained() && !op1->OperIs(GT_LIST))
605 useList.GetTreeNodeInfo(op1).isTgtPref = true;
608 // Is this a non-commutative operator, or is op2 a contained memory op?
609 // In either case, we need to make op2 remain live until the op is complete, by marking
610 // the source(s) associated with op2 as "delayFree".
611 // Note that if op2 of a binary RMW operator is a memory op, even if the operator
612 // is commutative, codegen cannot reverse them.
613 // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
614 // more work to be done to correctly reverse the operands if they involve memory
615 // operands. Also, we may need to handle more cases than GT_IND, especially once
616 // we've modified the register allocator to not require all nodes to be assigned
617 // a register (e.g. a spilled lclVar can often be referenced directly from memory).
618 // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
620 GenTree* delayUseSrc = nullptr;
621 // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
622 // to special case them.
623 if (tree->OperGet() == GT_XADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_LOCKADD)
625 // These tree nodes will have their op1 marked as isDelayFree=true.
626 // Hence these tree nodes should have a Def position so that op1's reg
627 // gets freed at DefLoc+1.
628 if (tree->TypeGet() == TYP_VOID)
630 // Right now a GT_XADD node could be morphed into a
631 // GT_LOCKADD of TYP_VOID. See gtExtractSideEffList().
632 // Note that it is advantageous to use GT_LOCKADD
633 // instead of of GT_XADD as the former uses lock.add,
634 // which allows its second operand to be a contained
635 // immediate wheres xadd instruction requires its
636 // second operand to be in a register.
637 assert(info->dstCount == 0);
639 // Give it an artificial type and mark it as an unused value.
640 // This results in a Def position created but not considered consumed by its parent node.
641 tree->gtType = TYP_INT;
643 info->isLocalDefUse = true;
644 tree->SetUnusedValue();
648 assert(info->dstCount != 0);
653 else if ((op2 != nullptr) && (!tree->OperIsCommutative() || (op2->isContained() && !op2->IsCnsIntOrI())))
657 if ((delayUseSrc != nullptr) && CheckAndSetDelayFree(delayUseSrc))
659 info->hasDelayFreeSrc = true;
664 BuildCheckByteable(tree);
666 // We need to be sure that we've set info->srcCount and info->dstCount appropriately
667 assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
668 assert(info->isLocalDefUse == (tree->IsValue() && tree->IsUnusedValue()));
669 assert(!tree->IsUnusedValue() || (info->dstCount != 0));
670 assert(info->dstCount == tree->GetRegisterDstCount());
673 //---------------------------------------------------------------------
674 // CheckAndSetDelayFree - Set isDelayFree on the given operand or its child(ren), if appropriate
677 // delayUseSrc - a node that may have a delayed use
680 // True iff the node or one of its children has been marked isDelayFree
683 // Only register operands should be marked isDelayFree, not contained immediates or memory.
685 bool LinearScan::CheckAndSetDelayFree(GenTree* delayUseSrc)
687 // If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
688 // on the base & index, if any.
689 // Otherwise, we set it on delayUseSrc itself.
690 bool returnValue = false;
691 if (delayUseSrc->isContained())
693 // If delayUseSrc is a non-Indir contained node (e.g. a local) there's no register use to delay.
694 if (delayUseSrc->isIndir())
696 GenTree* base = delayUseSrc->AsIndir()->Base();
697 GenTree* index = delayUseSrc->AsIndir()->Index();
698 if ((base != nullptr) && !base->isContained())
700 useList.GetTreeNodeInfo(base).isDelayFree = true;
703 if (index != nullptr)
705 assert(!index->isContained());
706 useList.GetTreeNodeInfo(index).isDelayFree = true;
713 useList.GetTreeNodeInfo(delayUseSrc).isDelayFree = true;
719 //------------------------------------------------------------------------
720 // BuildCheckByteable: Check the tree to see if "byte-able" registers are
721 // required, and set the tree node info accordingly.
724 // tree - The node of interest
729 void LinearScan::BuildCheckByteable(GenTree* tree)
732 TreeNodeInfo* info = currentNodeInfo;
733 // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
734 // if the tree node is a byte type.
736 // Though this looks conservative in theory, in practice we could not think of a case where
737 // the below logic leads to conservative register specification. In future when or if we find
738 // one such case, this logic needs to be fine tuned for that case(s).
740 if (ExcludeNonByteableRegisters(tree))
743 if (info->dstCount > 0)
745 regMask = info->getDstCandidates(this);
746 assert(regMask != RBM_NONE);
747 info->setDstCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
750 if (tree->OperIsSimple())
752 GenTree* op = tree->gtOp.gtOp1;
755 // No need to set src candidates on a contained child operand.
756 if (!op->isContained())
758 TreeNodeInfo& op1Info = useList.GetTreeNodeInfo(op);
759 regMask = op1Info.getSrcCandidates(this);
760 assert(regMask != RBM_NONE);
761 op1Info.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
765 if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
767 op = tree->gtOp.gtOp2;
768 if (!op->isContained())
770 TreeNodeInfo& op2Info = useList.GetTreeNodeInfo(op);
771 regMask = op2Info.getSrcCandidates(this);
772 assert(regMask != RBM_NONE);
773 op2Info.setSrcCandidates(this, regMask & ~RBM_NON_BYTE_REGS);
778 #endif //_TARGET_X86_
781 //------------------------------------------------------------------------------
782 // isRMWRegOper: Can this binary tree node be used in a Read-Modify-Write format
785 // tree - a binary tree node
788 // Returns true if we can use the read-modify-write instruction form
791 // This is used to determine whether to preference the source to the destination register.
793 bool LinearScan::isRMWRegOper(GenTree* tree)
795 // TODO-XArch-CQ: Make this more accurate.
796 // For now, We assume that most binary operators are of the RMW form.
797 assert(tree->OperIsBinary());
799 if (tree->OperIsCompare() || tree->OperIs(GT_CMP))
804 switch (tree->OperGet())
806 // These Opers either support a three op form (i.e. GT_LEA), or do not read/write their first operand
814 // x86/x64 does support a three op multiply when op2|op1 is a contained immediate
816 return (!tree->gtOp.gtOp2->isContainedIntOrIImmed() && !tree->gtOp.gtOp1->isContainedIntOrIImmed());
823 //------------------------------------------------------------------------
824 // BuildShiftRotate: Set the NodeInfo for a shift or rotate.
827 // tree - The node of interest
832 int LinearScan::BuildShiftRotate(GenTree* tree)
834 TreeNodeInfo* info = currentNodeInfo;
835 // For shift operations, we need that the number
836 // of bits moved gets stored in CL in case
837 // the number of bits to shift is not a constant.
839 GenTree* shiftBy = tree->gtOp.gtOp2;
840 GenTree* source = tree->gtOp.gtOp1;
841 LocationInfoListNode* shiftByInfo = nullptr;
842 // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
843 // We will allow whatever can be encoded - hope you know what you are doing.
844 if (shiftBy->isContained())
846 srcCount += GetOperandInfo(source);
851 shiftByInfo = getLocationInfo(shiftBy);
852 shiftByInfo->info.setSrcCandidates(this, RBM_RCX);
853 info->setDstCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
854 LocationInfoListNode* sourceInfo;
855 srcCount += GetOperandInfo(source, &sourceInfo);
856 for (; sourceInfo != nullptr; sourceInfo = sourceInfo->Next())
858 sourceInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~RBM_RCX);
862 // Note that Rotate Left/Right instructions don't set ZF and SF flags.
864 // If the operand being shifted is 32-bits then upper three bits are masked
865 // by hardware to get actual shift count. Similarly for 64-bit operands
866 // shift count is narrowed to [0..63]. If the resulting shift count is zero,
867 // then shift operation won't modify flags.
869 // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
870 // if the shift count is known to be non-zero and in the range depending on the
872 CLANG_FORMAT_COMMENT_ANCHOR;
875 // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
876 // we can have a three operand form. Increment the srcCount.
877 if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
879 assert((source->OperGet() == GT_LONG) && source->isContained());
881 GenTree* sourceLo = source->gtOp.gtOp1;
882 LocationInfoListNode* sourceLoInfo = useList.Begin();
883 LocationInfoListNode* sourceHiInfo = useList.GetSecond(INDEBUG(source->gtGetOp2()));
885 info->hasDelayFreeSrc = true;
886 if (tree->OperGet() == GT_LSH_HI)
888 sourceLoInfo->info.isDelayFree = true;
892 sourceHiInfo->info.isDelayFree = true;
896 if (shiftByInfo != nullptr)
898 if (tree->IsReverseOp())
900 useList.Prepend(shiftByInfo);
904 useList.Append(shiftByInfo);
907 if (!tree->isContained())
909 info->srcCount = srcCount;
914 //------------------------------------------------------------------------
915 // BuildCall: Set the NodeInfo for a call.
918 // call - The call node of interest
923 void LinearScan::BuildCall(GenTreeCall* call)
925 TreeNodeInfo* info = currentNodeInfo;
926 bool hasMultiRegRetVal = false;
927 ReturnTypeDesc* retTypeDesc = nullptr;
929 assert(!call->isContained());
931 if (call->TypeGet() != TYP_VOID)
933 hasMultiRegRetVal = call->HasMultiRegRetVal();
934 if (hasMultiRegRetVal)
936 // dst count = number of registers in which the value is returned by call
937 retTypeDesc = call->GetReturnTypeDesc();
938 info->dstCount = retTypeDesc->GetReturnRegCount();
942 assert(info->dstCount == 1);
947 assert(info->dstCount == 0);
950 GenTree* ctrlExpr = call->gtControlExpr;
951 LocationInfoListNode* ctrlExprInfo = nullptr;
952 if (call->gtCallType == CT_INDIRECT)
954 ctrlExpr = call->gtCallAddr;
957 // If this is a varargs call, we will clear the internal candidates in case we need
958 // to reserve some integer registers for copying float args.
959 // We have to do this because otherwise the default candidates are allRegs, and adding
960 // the individual specific registers will have no effect.
961 if (call->IsVarargs())
963 info->setInternalCandidates(this, RBM_NONE);
966 RegisterType registerType = call->TypeGet();
968 // Set destination candidates for return value of the call.
969 CLANG_FORMAT_COMMENT_ANCHOR;
972 if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
974 // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
975 // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
976 // correct argument registers.
977 info->setDstCandidates(this, RBM_PINVOKE_TCB);
980 #endif // _TARGET_X86_
981 if (hasMultiRegRetVal)
983 assert(retTypeDesc != nullptr);
984 info->setDstCandidates(this, retTypeDesc->GetABIReturnRegs());
986 else if (varTypeIsFloating(registerType))
989 // The return value will be on the X87 stack, and we will need to move it.
990 info->setDstCandidates(this, allRegs(registerType));
991 #else // !_TARGET_X86_
992 info->setDstCandidates(this, RBM_FLOATRET);
993 #endif // !_TARGET_X86_
995 else if (registerType == TYP_LONG)
997 info->setDstCandidates(this, RBM_LNGRET);
1001 info->setDstCandidates(this, RBM_INTRET);
1004 // number of args to a call =
1005 // callRegArgs + (callargs - placeholders, setup, etc)
1006 // there is an explicit thisPtr but it is redundant
1008 bool callHasFloatRegArgs = false;
1009 bool isVarArgs = call->IsVarargs();
1011 // First, count reg args
1012 for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
1014 assert(list->OperIsList());
1016 // By this point, lowering has ensured that all call arguments are one of the following:
1017 // - an arg setup store
1018 // - an arg placeholder
1024 // Note that this property is statically checked by LinearScan::CheckBlock.
1025 GenTree* argNode = list->Current();
1027 // Each register argument corresponds to one source.
1028 if (argNode->OperIsPutArgReg())
1031 HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
1032 appendLocationInfoToList(argNode);
1034 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1035 else if (argNode->OperGet() == GT_FIELD_LIST)
1037 for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
1039 assert(entry->Current()->OperIsPutArgReg());
1041 HandleFloatVarArgs(call, argNode, &callHasFloatRegArgs);
1042 appendLocationInfoToList(entry->Current());
1045 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
1048 // In DEBUG only, check validity with respect to the arg table entry.
1050 fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
1051 assert(curArgTabEntry);
1053 if (curArgTabEntry->regNum == REG_STK)
1055 // late arg that is not passed in a register
1056 assert(argNode->gtOper == GT_PUTARG_STK);
1058 #ifdef FEATURE_PUT_STRUCT_ARG_STK
1059 // If the node is TYP_STRUCT and it is put on stack with
1060 // putarg_stk operation, we consume and produce no registers.
1061 // In this case the embedded Obj node should not produce
1062 // registers too since it is contained.
1063 // Note that if it is a SIMD type the argument will be in a register.
1064 if (argNode->TypeGet() == TYP_STRUCT)
1066 assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
1067 assert(argNode->gtOp.gtOp1->isContained());
1069 #endif // FEATURE_PUT_STRUCT_ARG_STK
1072 #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
1073 if (argNode->OperGet() == GT_FIELD_LIST)
1075 assert(argNode->isContained());
1076 assert(varTypeIsStruct(argNode) || curArgTabEntry->isStruct);
1079 for (GenTreeFieldList* entry = argNode->AsFieldList(); entry != nullptr; entry = entry->Rest())
1081 const regNumber argReg = (i == 0) ? curArgTabEntry->regNum : curArgTabEntry->otherRegNum;
1082 assert(entry->Current()->gtRegNum == argReg);
1088 #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
1090 const regNumber argReg = curArgTabEntry->regNum;
1091 assert(argNode->gtRegNum == argReg);
1096 // Now, count stack args
1097 // Note that these need to be computed into a register, but then
1098 // they're just stored to the stack - so the reg doesn't
1099 // need to remain live until the call. In fact, it must not
1100 // because the code generator doesn't actually consider it live,
1101 // so it can't be spilled.
1103 GenTree* args = call->gtCallArgs;
1106 GenTree* arg = args->gtOp.gtOp1;
1107 if (!(arg->gtFlags & GTF_LATE_ARG) && !arg)
1109 if (arg->IsValue() && !arg->isContained())
1111 // argInfo->isLocalDefUse = true;
1112 assert(arg->IsUnusedValue());
1114 // assert(argInfo->dstCount == 0);
1116 args = args->gtOp.gtOp2;
1119 // set reg requirements on call target represented as control sequence.
1120 if (ctrlExpr != nullptr)
1122 LocationInfoListNode* ctrlExprInfo = nullptr;
1123 int ctrlExprCount = GetOperandInfo(ctrlExpr);
1124 if (ctrlExprCount != 0)
1126 assert(ctrlExprCount == 1);
1127 ctrlExprInfo = useList.Last();
1131 // In case of fast tail implemented as jmp, make sure that gtControlExpr is
1132 // computed into a register.
1133 if (call->IsFastTailCall())
1135 assert(!ctrlExpr->isContained() && ctrlExprInfo != nullptr);
1136 // Fast tail call - make sure that call target is always computed in RAX
1137 // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
1138 ctrlExprInfo->info.setSrcCandidates(this, RBM_RAX);
1141 else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
1143 // On x86, we need to generate a very specific pattern for indirect VSD calls:
1146 // call dword ptr [eax]
1148 // Where EAX is also used as an argument to the stub dispatch helper. Make
1149 // sure that the call target address is computed into EAX in this case.
1150 assert(ctrlExprInfo != nullptr);
1151 assert(ctrlExpr->isIndir() && ctrlExpr->isContained());
1152 ctrlExprInfo->info.setSrcCandidates(this, RBM_VIRTUAL_STUB_TARGET);
1154 #endif // _TARGET_X86_
1157 // If it is a fast tail call, it is already preferenced to use RAX.
1158 // Therefore, no need set src candidates on call tgt again.
1159 if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExprInfo != nullptr))
1161 // Don't assign the call target to any of the argument registers because
1162 // we will use them to also pass floating point arguments as required
1164 ctrlExprInfo->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_ARG_REGS));
1166 #endif // !FEATURE_VARARG
1170 //------------------------------------------------------------------------
1171 // BuildBlockStore: Set the NodeInfo for a block store.
1174 // blkNode - The block store node of interest
1179 void LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
1181 TreeNodeInfo* info = currentNodeInfo;
1182 GenTree* dstAddr = blkNode->Addr();
1183 unsigned size = blkNode->gtBlkSize;
1184 GenTree* source = blkNode->Data();
1186 LocationInfoListNode* dstAddrInfo = nullptr;
1187 LocationInfoListNode* sourceInfo = nullptr;
1188 LocationInfoListNode* sizeInfo = nullptr;
1190 // Sources are dest address, initVal or source.
1191 // We may require an additional source or temp register for the size.
1192 if (!dstAddr->isContained())
1195 dstAddrInfo = getLocationInfo(dstAddr);
1197 assert(info->dstCount == 0);
1198 info->setInternalCandidates(this, RBM_NONE);
1199 GenTree* srcAddrOrFill = nullptr;
1200 bool isInitBlk = blkNode->OperIsInitBlkOp();
1202 regMaskTP dstAddrRegMask = RBM_NONE;
1203 regMaskTP sourceRegMask = RBM_NONE;
1204 regMaskTP blkSizeRegMask = RBM_NONE;
1208 GenTree* initVal = source;
1209 if (initVal->OperIsInitVal())
1211 assert(initVal->isContained());
1212 initVal = initVal->gtGetOp1();
1214 srcAddrOrFill = initVal;
1215 if (!initVal->isContained())
1218 sourceInfo = getLocationInfo(initVal);
1221 switch (blkNode->gtBlkOpKind)
1223 case GenTreeBlk::BlkOpKindUnroll:
1224 assert(initVal->IsCnsIntOrI());
1225 if (size >= XMM_REGSIZE_BYTES)
1227 // Reserve an XMM register to fill it with a pack of 16 init value constants.
1228 info->internalFloatCount = 1;
1229 info->setInternalCandidates(this, internalFloatRegCandidates());
1230 // use XMM register to fill with constants, it's AVX instruction and set the flag
1231 SetContainsAVXFlags();
1234 if ((size & 1) != 0)
1236 // On x86, you can't address the lower byte of ESI, EDI, ESP, or EBP when doing
1237 // a "mov byte ptr [dest], val". If the fill size is odd, we will try to do this
1238 // when unrolling, so only allow byteable registers as the source value. (We could
1239 // consider just using BlkOpKindRepInstr instead.)
1240 sourceRegMask = RBM_BYTE_REGS;
1242 #endif // _TARGET_X86_
1245 case GenTreeBlk::BlkOpKindRepInstr:
1246 // rep stos has the following register requirements:
1247 // a) The memory address to be in RDI.
1248 // b) The fill value has to be in RAX.
1249 // c) The buffer size will go in RCX.
1250 dstAddrRegMask = RBM_RDI;
1251 sourceRegMask = RBM_RAX;
1252 blkSizeRegMask = RBM_RCX;
1255 case GenTreeBlk::BlkOpKindHelper:
1256 #ifdef _TARGET_AMD64_
1257 // The helper follows the regular AMD64 ABI.
1258 dstAddrRegMask = RBM_ARG_0;
1259 sourceRegMask = RBM_ARG_1;
1260 blkSizeRegMask = RBM_ARG_2;
1261 #else // !_TARGET_AMD64_
1262 dstAddrRegMask = RBM_RDI;
1263 sourceRegMask = RBM_RAX;
1264 blkSizeRegMask = RBM_RCX;
1265 #endif // !_TARGET_AMD64_
1274 // CopyObj or CopyBlk
1275 if (source->gtOper == GT_IND)
1277 assert(source->isContained());
1278 srcAddrOrFill = source->gtGetOp1();
1279 if (!srcAddrOrFill->isContained())
1281 sourceInfo = getLocationInfo(srcAddrOrFill);
1285 if (blkNode->OperGet() == GT_STORE_OBJ)
1287 if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindRepInstr)
1289 // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
1290 blkSizeRegMask = RBM_RCX;
1292 // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its
1294 sourceRegMask = RBM_RSI;
1295 dstAddrRegMask = RBM_RDI;
1299 switch (blkNode->gtBlkOpKind)
1301 case GenTreeBlk::BlkOpKindUnroll:
1302 // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
1304 // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
1305 // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
1306 // RBM_NON_BYTE_REGS from internal candidates.
1307 if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
1309 info->internalIntCount++;
1310 regMaskTP regMask = allRegs(TYP_INT);
1313 if ((size & 1) != 0)
1315 regMask &= ~RBM_NON_BYTE_REGS;
1318 info->setInternalCandidates(this, regMask);
1321 if (size >= XMM_REGSIZE_BYTES)
1323 // If we have a buffer larger than XMM_REGSIZE_BYTES,
1324 // reserve an XMM register to use it for a
1325 // series of 16-byte loads and stores.
1326 info->internalFloatCount = 1;
1327 info->addInternalCandidates(this, internalFloatRegCandidates());
1328 // Uses XMM reg for load and store and hence check to see whether AVX instructions
1329 // are used for codegen, set ContainsAVX flag
1330 SetContainsAVXFlags();
1334 case GenTreeBlk::BlkOpKindRepInstr:
1335 // rep stos has the following register requirements:
1336 // a) The dest address has to be in RDI.
1337 // b) The src address has to be in RSI.
1338 // c) The buffer size will go in RCX.
1339 dstAddrRegMask = RBM_RDI;
1340 sourceRegMask = RBM_RSI;
1341 blkSizeRegMask = RBM_RCX;
1344 case GenTreeBlk::BlkOpKindHelper:
1345 #ifdef _TARGET_AMD64_
1346 // The helper follows the regular AMD64 ABI.
1347 dstAddrRegMask = RBM_ARG_0;
1348 sourceRegMask = RBM_ARG_1;
1349 blkSizeRegMask = RBM_ARG_2;
1350 #else // !_TARGET_AMD64_
1351 dstAddrRegMask = RBM_RDI;
1352 sourceRegMask = RBM_RAX;
1353 blkSizeRegMask = RBM_RCX;
1354 #endif // !_TARGET_AMD64_
1363 if (dstAddrInfo != nullptr)
1365 if (dstAddrRegMask != RBM_NONE)
1367 dstAddrInfo->info.setSrcCandidates(this, dstAddrRegMask);
1369 useList.Append(dstAddrInfo);
1371 if (sourceRegMask != RBM_NONE)
1373 if (sourceInfo != nullptr)
1375 sourceInfo->info.setSrcCandidates(this, sourceRegMask);
1379 // This is a local source; we'll use a temp register for its address.
1380 info->addInternalCandidates(this, sourceRegMask);
1381 info->internalIntCount++;
1384 if (sourceInfo != nullptr)
1386 useList.Add(sourceInfo, blkNode->IsReverseOp());
1389 if (blkNode->OperIs(GT_STORE_DYN_BLK))
1391 // The block size argument is a third argument to GT_STORE_DYN_BLK
1394 GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
1395 sizeInfo = getLocationInfo(blockSize);
1396 useList.Add(sizeInfo, blkNode->AsDynBlk()->gtEvalSizeFirst);
1399 if (blkSizeRegMask != RBM_NONE)
1403 // Reserve a temp register for the block size argument.
1404 info->addInternalCandidates(this, blkSizeRegMask);
1405 info->internalIntCount++;
1409 // The block size argument is a third argument to GT_STORE_DYN_BLK
1410 assert((blkNode->gtOper == GT_STORE_DYN_BLK) && (sizeInfo != nullptr));
1411 info->setSrcCount(3);
1412 sizeInfo->info.setSrcCandidates(this, blkSizeRegMask);
1417 #ifdef FEATURE_PUT_STRUCT_ARG_STK
1418 //------------------------------------------------------------------------
1419 // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
1422 // tree - The node of interest
1427 void LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
1429 TreeNodeInfo* info = currentNodeInfo;
1431 assert(info->dstCount == 0);
1433 if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
1435 putArgStk->gtOp1->SetContained();
1438 unsigned fieldCount = 0;
1439 bool needsByteTemp = false;
1440 bool needsSimdTemp = false;
1441 unsigned prevOffset = putArgStk->getArgSize();
1442 for (GenTreeFieldList* current = putArgStk->gtOp1->AsFieldList(); current != nullptr; current = current->Rest())
1444 GenTree* const fieldNode = current->Current();
1445 const var_types fieldType = fieldNode->TypeGet();
1446 const unsigned fieldOffset = current->gtFieldOffset;
1447 assert(fieldType != TYP_LONG);
1449 #if defined(FEATURE_SIMD)
1450 // Note that we need to check the GT_FIELD_LIST type, not 'fieldType'. This is because the
1451 // GT_FIELD_LIST will be TYP_SIMD12 whereas the fieldType might be TYP_SIMD16 for lclVar, where
1452 // we "round up" to 16.
1453 if (current->gtFieldType == TYP_SIMD12)
1455 needsSimdTemp = true;
1457 #endif // defined(FEATURE_SIMD)
1459 // We can treat as a slot any field that is stored at a slot boundary, where the previous
1460 // field is not in the same slot. (Note that we store the fields in reverse order.)
1461 const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
1464 if (varTypeIsByte(fieldType))
1466 // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
1467 // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
1468 // need a byte-addressable register for the store. We will enforce this requirement on an internal
1469 // register, which we can use to copy multiple byte values.
1470 needsByteTemp = true;
1474 if (varTypeIsGC(fieldType))
1476 putArgStk->gtNumberReferenceSlots++;
1478 prevOffset = fieldOffset;
1480 if (!fieldNode->isContained())
1482 appendLocationInfoToList(fieldNode);
1487 if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push)
1489 // If any of the fields cannot be stored with an actual push, we may need a temporary
1490 // register to load the value before storing it to the stack location.
1491 info->internalIntCount = 1;
1492 regMaskTP regMask = allRegs(TYP_INT);
1495 regMask &= ~RBM_NON_BYTE_REGS;
1497 info->setInternalCandidates(this, regMask);
1500 #if defined(FEATURE_SIMD)
1501 // For PutArgStk of a TYP_SIMD12, we need a SIMD temp register.
1504 assert(info->dstCount == 0);
1505 info->internalFloatCount += 1;
1506 info->addInternalCandidates(this, allSIMDRegs());
1508 #endif // defined(FEATURE_SIMD)
1511 #endif // _TARGET_X86_
1514 GenTree* src = putArgStk->gtOp1;
1515 var_types type = src->TypeGet();
1517 #if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
1518 // For PutArgStk of a TYP_SIMD12, we need an extra register.
1519 if (putArgStk->isSIMD12())
1521 appendLocationInfoToList(putArgStk->gtOp1);
1523 info->internalFloatCount = 1;
1524 info->setInternalCandidates(this, allSIMDRegs());
1527 #endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
1529 if (type != TYP_STRUCT)
1531 BuildSimple(putArgStk);
1535 GenTree* dst = putArgStk;
1536 GenTree* srcAddr = nullptr;
1538 info->srcCount = GetOperandInfo(src);
1540 // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
1541 // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
1542 // our framework assemblies, so this is the main code generation scheme we'll use.
1543 ssize_t size = putArgStk->gtNumSlots * TARGET_POINTER_SIZE;
1544 switch (putArgStk->gtPutArgStkKind)
1546 case GenTreePutArgStk::Kind::Push:
1547 case GenTreePutArgStk::Kind::PushAllSlots:
1548 case GenTreePutArgStk::Kind::Unroll:
1549 // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
1551 // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
1552 // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
1553 // RBM_NON_BYTE_REGS from internal candidates.
1554 if ((putArgStk->gtNumberReferenceSlots == 0) && (size & (XMM_REGSIZE_BYTES - 1)) != 0)
1556 info->internalIntCount++;
1557 regMaskTP regMask = allRegs(TYP_INT);
1560 if ((size % 2) != 0)
1562 regMask &= ~RBM_NON_BYTE_REGS;
1565 info->setInternalCandidates(this, regMask);
1570 #else // !_TARGET_X86_
1571 if (size >= XMM_REGSIZE_BYTES)
1572 #endif // !_TARGET_X86_
1574 // If we have a buffer larger than or equal to XMM_REGSIZE_BYTES on x64/ux,
1575 // or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a
1576 // series of 16-byte loads and stores.
1577 info->internalFloatCount = 1;
1578 info->addInternalCandidates(this, internalFloatRegCandidates());
1579 SetContainsAVXFlags();
1583 case GenTreePutArgStk::Kind::RepInstr:
1584 info->internalIntCount += 3;
1585 info->setInternalCandidates(this, (RBM_RDI | RBM_RCX | RBM_RSI));
1592 #endif // FEATURE_PUT_STRUCT_ARG_STK
1594 //------------------------------------------------------------------------
1595 // BuildLclHeap: Set the NodeInfo for a GT_LCLHEAP.
1598 // tree - The node of interest
1603 void LinearScan::BuildLclHeap(GenTree* tree)
1605 TreeNodeInfo* info = currentNodeInfo;
1607 assert(info->dstCount == 1);
1609 // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
1610 // Here '-' means don't care.
1612 // Size? Init Memory? # temp regs
1613 // 0 - 0 (returns 0)
1614 // const and <=6 reg words - 0 (pushes '0')
1615 // const and >6 reg words Yes 0 (pushes '0')
1616 // const and <PageSize No 0 (amd64) 1 (x86)
1617 // (x86:tmpReg for sutracting from esp)
1618 // const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp)
1619 // Non-const Yes 0 (regCnt=targetReg and pushes '0')
1620 // Non-const No 2 (regCnt and tmpReg for subtracting from sp)
1622 // Note: Here we don't need internal register to be different from targetReg.
1623 // Rather, require it to be different from operand's reg.
1625 GenTree* size = tree->gtOp.gtOp1;
1626 if (size->IsCnsIntOrI())
1628 assert(size->isContained());
1630 size_t sizeVal = size->gtIntCon.gtIconVal;
1634 info->internalIntCount = 0;
1638 // Compute the amount of memory to properly STACK_ALIGN.
1639 // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
1640 // This should also help in debugging as we can examine the original size specified with localloc.
1641 sizeVal = AlignUp(sizeVal, STACK_ALIGN);
1643 // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
1644 // we will generate 'push 0'.
1645 assert((sizeVal % REGSIZE_BYTES) == 0);
1646 size_t cntRegSizedWords = sizeVal / REGSIZE_BYTES;
1647 if (cntRegSizedWords <= 6)
1649 info->internalIntCount = 0;
1651 else if (!compiler->info.compInitMem)
1653 // No need to initialize allocated stack space.
1654 if (sizeVal < compiler->eeGetPageSize())
1657 info->internalIntCount = 1; // x86 needs a register here to avoid generating "sub" on ESP.
1658 #else // !_TARGET_X86_
1659 info->internalIntCount = 0;
1660 #endif // !_TARGET_X86_
1664 // We need two registers: regCnt and RegTmp
1665 info->internalIntCount = 2;
1670 // >6 and need to zero initialize allocated stack space.
1671 info->internalIntCount = 0;
1677 appendLocationInfoToList(size);
1678 if (!compiler->info.compInitMem)
1680 info->internalIntCount = 2;
1684 info->internalIntCount = 0;
1689 //------------------------------------------------------------------------
1690 // BuildModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
1693 // tree - The node of interest
1698 void LinearScan::BuildModDiv(GenTree* tree)
1700 TreeNodeInfo* info = currentNodeInfo;
1701 GenTree* op1 = tree->gtGetOp1();
1702 GenTree* op2 = tree->gtGetOp2();
1704 assert(info->dstCount == 1);
1706 if (varTypeIsFloating(tree->TypeGet()))
1708 info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
1712 // Amd64 Div/Idiv instruction:
1713 // Dividend in RAX:RDX and computes
1714 // Quotient in RAX, Remainder in RDX
1716 if (tree->OperGet() == GT_MOD || tree->OperGet() == GT_UMOD)
1718 // We are interested in just the remainder.
1719 // RAX is used as a trashable register during computation of remainder.
1720 info->setDstCandidates(this, RBM_RDX);
1724 // We are interested in just the quotient.
1725 // RDX gets used as trashable register during computation of quotient
1726 info->setDstCandidates(this, RBM_RAX);
1730 if (op1->OperGet() == GT_LONG)
1732 assert(op1->isContained());
1734 // To avoid reg move would like to have op1's low part in RAX and high part in RDX.
1735 GenTree* loVal = op1->gtGetOp1();
1736 GenTree* hiVal = op1->gtGetOp2();
1738 assert(op2->IsCnsIntOrI());
1739 assert(tree->OperGet() == GT_UMOD);
1741 // This situation also requires an internal register.
1742 info->internalIntCount = 1;
1743 info->setInternalCandidates(this, allRegs(TYP_INT));
1745 LocationInfoListNode* loValInfo = getLocationInfo(loVal);
1746 LocationInfoListNode* hiValInfo = getLocationInfo(hiVal);
1747 loValInfo->info.setSrcCandidates(this, RBM_EAX);
1748 hiValInfo->info.setSrcCandidates(this, RBM_EDX);
1749 useList.Append(loValInfo);
1750 useList.Append(hiValInfo);
1756 // If possible would like to have op1 in RAX to avoid a register move
1757 LocationInfoListNode* op1Info = getLocationInfo(op1);
1758 op1Info->info.setSrcCandidates(this, RBM_RAX);
1759 useList.Append(op1Info);
1763 LocationInfoListNode* op2Info;
1764 info->srcCount += GetOperandInfo(op2, &op2Info);
1765 for (; op2Info != nullptr; op2Info = op2Info->Next())
1767 op2Info->info.setSrcCandidates(this, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
1771 //------------------------------------------------------------------------
1772 // BuildIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
1775 // tree - The node of interest
1780 void LinearScan::BuildIntrinsic(GenTree* tree)
1782 TreeNodeInfo* info = currentNodeInfo;
1783 // Both operand and its result must be of floating point type.
1784 GenTree* op1 = tree->gtGetOp1();
1785 assert(varTypeIsFloating(op1));
1786 assert(op1->TypeGet() == tree->TypeGet());
1788 info->srcCount = GetOperandInfo(op1);
1789 assert(info->dstCount == 1);
1791 switch (tree->gtIntrinsic.gtIntrinsicId)
1793 case CORINFO_INTRINSIC_Sqrt:
1796 case CORINFO_INTRINSIC_Abs:
1797 // Abs(float x) = x & 0x7fffffff
1798 // Abs(double x) = x & 0x7ffffff ffffffff
1800 // In case of Abs we need an internal register to hold mask.
1802 // TODO-XArch-CQ: avoid using an internal register for the mask.
1803 // Andps or andpd both will operate on 128-bit operands.
1804 // The data section constant to hold the mask is a 64-bit size.
1805 // Therefore, we need both the operand and mask to be in
1806 // xmm register. When we add support in emitter to emit 128-bit
1807 // data constants and instructions that operate on 128-bit
1808 // memory operands we can avoid the need for an internal register.
1809 if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs)
1811 info->internalFloatCount = 1;
1812 info->setInternalCandidates(this, internalFloatRegCandidates());
1817 case CORINFO_INTRINSIC_Cos:
1818 case CORINFO_INTRINSIC_Sin:
1819 NYI_X86("Math intrinsics Cos and Sin");
1821 #endif // _TARGET_X86_
1823 case CORINFO_INTRINSIC_Round:
1824 case CORINFO_INTRINSIC_Ceiling:
1825 case CORINFO_INTRINSIC_Floor:
1826 #if defined(LEGACY_BACKEND)
1827 NYI_X86("Math intrinsics Round, Ceiling, and Floor");
1828 #endif // LEGACY_BACKEND
1832 // Right now only Sqrt/Abs are treated as math intrinsics
1833 noway_assert(!"Unsupported math intrinsic");
1840 //------------------------------------------------------------------------
1841 // BuildSIMD: Set the NodeInfo for a GT_SIMD tree.
1844 // tree - The GT_SIMD node of interest
1849 void LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
1851 TreeNodeInfo* info = currentNodeInfo;
1852 // Only SIMDIntrinsicInit can be contained. Other than that,
1853 // only SIMDIntrinsicOpEquality and SIMDIntrinsicOpInEquality can have 0 dstCount.
1854 if (simdTree->isContained())
1856 assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit);
1858 else if (info->dstCount != 1)
1860 assert((simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ||
1861 (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality));
1863 SetContainsAVXFlags(true, simdTree->gtSIMDSize);
1864 GenTree* op1 = simdTree->gtOp.gtOp1;
1865 GenTree* op2 = simdTree->gtOp.gtOp2;
1867 if (!op1->OperIs(GT_LIST))
1869 info->srcCount += GetOperandInfo(op1);
1871 if ((op2 != nullptr) && !op2->isContained())
1873 info->srcCount += GetOperandInfo(op2);
1876 switch (simdTree->gtSIMDIntrinsicID)
1878 case SIMDIntrinsicInit:
1880 // This sets all fields of a SIMD struct to the given value.
1881 // Mark op1 as contained if it is either zero or int constant of all 1's,
1882 // or a float constant with 16 or 32 byte simdType (AVX case)
1884 // Should never see small int base type vectors except for zero initialization.
1885 assert(!varTypeIsSmallInt(simdTree->gtSIMDBaseType) || op1->IsIntegralConst(0));
1887 #if !defined(_TARGET_64BIT_)
1888 if (op1->OperGet() == GT_LONG)
1890 assert(op1->isContained());
1891 GenTree* op1lo = op1->gtGetOp1();
1892 GenTree* op1hi = op1->gtGetOp2();
1894 if (op1lo->isContained())
1896 assert(op1hi->isContained());
1897 assert((op1lo->IsIntegralConst(0) && op1hi->IsIntegralConst(0)) ||
1898 (op1lo->IsIntegralConst(-1) && op1hi->IsIntegralConst(-1)));
1899 assert(info->srcCount == 0);
1903 assert(info->srcCount == 2);
1904 info->internalFloatCount = 1;
1905 info->setInternalCandidates(this, allSIMDRegs());
1906 info->isInternalRegDelayFree = true;
1909 #endif // !defined(_TARGET_64BIT_)
1913 case SIMDIntrinsicInitN:
1915 var_types baseType = simdTree->gtSIMDBaseType;
1916 info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(baseType));
1918 for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2())
1920 assert(list->OperGet() == GT_LIST);
1921 GenTree* listItem = list->gtGetOp1();
1922 assert(listItem->TypeGet() == baseType);
1923 assert(!listItem->isContained());
1924 appendLocationInfoToList(listItem);
1927 assert(initCount == info->srcCount);
1929 // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
1930 info->internalFloatCount = 1;
1931 info->setInternalCandidates(this, allSIMDRegs());
1935 case SIMDIntrinsicInitArray:
1936 // We have an array and an index, which may be contained.
1937 assert(info->srcCount == (simdTree->gtGetOp2()->isContained() ? 1 : 2));
1940 case SIMDIntrinsicDiv:
1941 // SSE2 has no instruction support for division on integer vectors
1942 noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
1943 assert(info->srcCount == 2);
1946 case SIMDIntrinsicAbs:
1947 // float/double vectors: This gets implemented as bitwise-And operation
1948 // with a mask and hence should never see here.
1950 // Must be a Vector<int> or Vector<short> Vector<sbyte>
1951 assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
1952 simdTree->gtSIMDBaseType == TYP_BYTE);
1953 assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
1954 assert(info->srcCount == 1);
1957 case SIMDIntrinsicSqrt:
1958 // SSE2 has no instruction support for sqrt on integer vectors.
1959 noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
1960 assert(info->srcCount == 1);
1963 case SIMDIntrinsicAdd:
1964 case SIMDIntrinsicSub:
1965 case SIMDIntrinsicMul:
1966 case SIMDIntrinsicBitwiseAnd:
1967 case SIMDIntrinsicBitwiseAndNot:
1968 case SIMDIntrinsicBitwiseOr:
1969 case SIMDIntrinsicBitwiseXor:
1970 case SIMDIntrinsicMin:
1971 case SIMDIntrinsicMax:
1972 assert(info->srcCount == 2);
1974 // SSE2 32-bit integer multiplication requires two temp regs
1975 if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
1976 compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
1978 info->internalFloatCount = 2;
1979 info->setInternalCandidates(this, allSIMDRegs());
1983 case SIMDIntrinsicEqual:
1984 assert(info->srcCount == 2);
1987 // SSE2 doesn't support < and <= directly on int vectors.
1988 // Instead we need to use > and >= with swapped operands.
1989 case SIMDIntrinsicLessThan:
1990 case SIMDIntrinsicLessThanOrEqual:
1991 assert(info->srcCount == 2);
1992 noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
1995 // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
1996 // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
1997 // Instead we need to use < and <= with swapped operands.
1998 case SIMDIntrinsicGreaterThan:
1999 noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
2000 assert(info->srcCount == 2);
2003 case SIMDIntrinsicOpEquality:
2004 case SIMDIntrinsicOpInEquality:
2005 if (simdTree->gtGetOp2()->isContained())
2007 // If the second operand is contained then ContainCheckSIMD has determined
2008 // that PTEST can be used. We only need a single source register and no
2009 // internal registers.
2010 assert(info->srcCount == 1);
2014 // Can't use PTEST so we need 2 source registers, 1 internal SIMD register
2015 // (to hold the result of PCMPEQD or other similar SIMD compare instruction)
2016 // and one internal INT register (to hold the result of PMOVMSKB).
2017 assert(info->srcCount == 2);
2018 info->internalFloatCount = 1;
2019 info->setInternalCandidates(this, allSIMDRegs());
2020 info->internalIntCount = 1;
2021 info->addInternalCandidates(this, allRegs(TYP_INT));
2023 // These SIMD nodes only set the condition flags.
2027 case SIMDIntrinsicDotProduct:
2028 // Float/Double vectors:
2029 // For SSE, or AVX with 32-byte vectors, we also need an internal register
2030 // as scratch. Further we need the targetReg and internal reg to be distinct
2031 // registers. Note that if this is a TYP_SIMD16 or smaller on AVX, then we
2032 // don't need a tmpReg.
2034 // 32-byte integer vector on SSE4/AVX:
2035 // will take advantage of phaddd, which operates only on 128-bit xmm reg.
2036 // This will need 1 (in case of SSE4) or 2 (in case of AVX) internal
2037 // registers since targetReg is an int type register.
2039 // See genSIMDIntrinsicDotProduct() for details on code sequence generated
2040 // and the need for scratch registers.
2041 if (varTypeIsFloating(simdTree->gtSIMDBaseType))
2043 if ((compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported) ||
2044 (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
2046 info->internalFloatCount = 1;
2047 info->isInternalRegDelayFree = true;
2048 info->setInternalCandidates(this, allSIMDRegs());
2050 // else don't need scratch reg(s).
2054 assert(simdTree->gtSIMDBaseType == TYP_INT && compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
2056 // No need to set isInternalRegDelayFree since targetReg is a
2057 // an int type reg and guaranteed to be different from xmm/ymm
2059 info->internalFloatCount = (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) ? 2 : 1;
2060 info->setInternalCandidates(this, allSIMDRegs());
2062 assert(info->srcCount == 2);
2065 case SIMDIntrinsicGetItem:
2067 // This implements get_Item method. The sources are:
2068 // - the source SIMD struct
2069 // - index (which element to get)
2070 // The result is baseType of SIMD struct.
2071 // op1 may be a contained memory op, but if so we will consume its address.
2072 // op2 may be a contained constant.
2073 op1 = simdTree->gtOp.gtOp1;
2074 op2 = simdTree->gtOp.gtOp2;
2076 if (!op1->isContained())
2078 // If the index is not a constant, we will use the SIMD temp location to store the vector.
2079 // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
2080 // can use that in the process of extracting the element.
2082 // If the index is a constant and base type is a small int we can use pextrw, but on AVX
2083 // we will need a temp if are indexing into the upper half of the AVX register.
2084 // In all other cases with constant index, we need a temp xmm register to extract the
2085 // element if index is other than zero.
2087 if (!op2->IsCnsIntOrI())
2089 (void)compiler->getSIMDInitTempVarNum();
2091 else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
2094 if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
2095 (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported))
2097 int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
2098 needFloatTemp = (byteShiftCnt >= 16);
2102 needFloatTemp = !op2->IsIntegralConst(0);
2107 info->internalFloatCount = 1;
2108 info->setInternalCandidates(this, allSIMDRegs());
2115 case SIMDIntrinsicSetX:
2116 case SIMDIntrinsicSetY:
2117 case SIMDIntrinsicSetZ:
2118 case SIMDIntrinsicSetW:
2119 assert(info->srcCount == 2);
2121 // We need an internal integer register for SSE2 codegen
2122 if (compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported)
2124 info->internalIntCount = 1;
2125 info->setInternalCandidates(this, allRegs(TYP_INT));
2130 case SIMDIntrinsicCast:
2131 assert(info->srcCount == 1);
2134 case SIMDIntrinsicConvertToSingle:
2135 assert(info->srcCount == 1);
2136 if (simdTree->gtSIMDBaseType == TYP_UINT)
2138 // We need an internal register different from targetReg.
2139 info->isInternalRegDelayFree = true;
2140 info->internalIntCount = 1;
2141 info->internalFloatCount = 2;
2142 info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
2146 case SIMDIntrinsicConvertToInt32:
2147 assert(info->srcCount == 1);
2150 case SIMDIntrinsicWidenLo:
2151 case SIMDIntrinsicWidenHi:
2152 assert(info->srcCount == 1);
2153 if (varTypeIsIntegral(simdTree->gtSIMDBaseType))
2155 // We need an internal register different from targetReg.
2156 info->isInternalRegDelayFree = true;
2157 info->internalFloatCount = 1;
2158 info->setInternalCandidates(this, allSIMDRegs());
2162 case SIMDIntrinsicConvertToInt64:
2163 assert(info->srcCount == 1);
2164 // We need an internal register different from targetReg.
2165 info->isInternalRegDelayFree = true;
2166 info->internalIntCount = 1;
2167 if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
2169 info->internalFloatCount = 2;
2173 info->internalFloatCount = 1;
2175 info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
2178 case SIMDIntrinsicConvertToDouble:
2179 assert(info->srcCount == 1);
2180 // We need an internal register different from targetReg.
2181 info->isInternalRegDelayFree = true;
2182 info->internalIntCount = 1;
2184 if (simdTree->gtSIMDBaseType == TYP_LONG)
2186 info->internalFloatCount = 3;
2190 if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) || (simdTree->gtSIMDBaseType == TYP_ULONG))
2192 info->internalFloatCount = 2;
2196 info->internalFloatCount = 1;
2198 info->setInternalCandidates(this, allSIMDRegs() | allRegs(TYP_INT));
2201 case SIMDIntrinsicNarrow:
2202 assert(info->srcCount == 2);
2203 // We need an internal register different from targetReg.
2204 info->isInternalRegDelayFree = true;
2205 if ((compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) && (simdTree->gtSIMDBaseType != TYP_DOUBLE))
2207 info->internalFloatCount = 2;
2211 info->internalFloatCount = 1;
2213 info->setInternalCandidates(this, allSIMDRegs());
2216 case SIMDIntrinsicShuffleSSE2:
2217 assert(info->srcCount == 1);
2218 // Second operand is an integer constant and marked as contained.
2219 assert(simdTree->gtOp.gtOp2->isContainedIntOrIImmed());
2222 case SIMDIntrinsicGetX:
2223 case SIMDIntrinsicGetY:
2224 case SIMDIntrinsicGetZ:
2225 case SIMDIntrinsicGetW:
2226 case SIMDIntrinsicGetOne:
2227 case SIMDIntrinsicGetZero:
2228 case SIMDIntrinsicGetCount:
2229 case SIMDIntrinsicGetAllOnes:
2230 assert(!"Get intrinsics should not be seen during Lowering.");
2234 noway_assert(!"Unimplemented SIMD node type.");
2238 #endif // FEATURE_SIMD
2240 #ifdef FEATURE_HW_INTRINSICS
2241 //------------------------------------------------------------------------
2242 // BuildHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
2245 // tree - The GT_HWIntrinsic node of interest
2250 void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
2252 TreeNodeInfo* info = currentNodeInfo;
2253 NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
2254 InstructionSet isa = Compiler::isaOfHWIntrinsic(intrinsicID);
2255 if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2)
2257 SetContainsAVXFlags(true, 32);
2259 GenTree* op1 = intrinsicTree->gtOp.gtOp1;
2260 GenTree* op2 = intrinsicTree->gtOp.gtOp2;
2265 if (op1->OperIsList())
2267 for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest())
2269 info->srcCount += GetOperandInfo(list->Current());
2274 info->srcCount += GetOperandInfo(op1);
2280 info->srcCount += GetOperandInfo(op2);
2283 switch (intrinsicID)
2285 case NI_SSE_CompareEqualOrderedScalar:
2286 case NI_SSE_CompareEqualUnorderedScalar:
2287 case NI_SSE_CompareNotEqualOrderedScalar:
2288 case NI_SSE_CompareNotEqualUnorderedScalar:
2289 case NI_SSE2_CompareEqualOrderedScalar:
2290 case NI_SSE2_CompareEqualUnorderedScalar:
2291 case NI_SSE2_CompareNotEqualOrderedScalar:
2292 case NI_SSE2_CompareNotEqualUnorderedScalar:
2293 info->internalIntCount = 1;
2294 info->setInternalCandidates(this, RBM_BYTE_REGS);
2297 case NI_SSE_SetScalarVector128:
2298 // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
2299 info->internalFloatCount = 1;
2300 info->setInternalCandidates(this, allSIMDRegs());
2303 case NI_SSE_Shuffle:
2305 assert(op1->OperIsList());
2306 GenTree* op3 = op1->AsArgList()->Rest()->Rest()->Current();
2308 if (!op3->isContainedIntOrIImmed())
2310 assert(!op3->IsCnsIntOrI());
2312 // We need two extra reg when op3 isn't a constant so
2313 // the offset into the jump table for the fallback path
2316 info->internalIntCount = 2;
2317 info->setInternalCandidates(this, allRegs(TYP_INT));
2322 case NI_SSE_ConvertToSingle:
2323 case NI_SSE_StaticCast:
2324 case NI_SSE2_ConvertToDouble:
2325 assert(info->srcCount == 1);
2326 assert(info->dstCount == 1);
2327 useList.Last()->info.isTgtPref = true;
2330 case NI_SSE41_BlendVariable:
2331 if (!compiler->canUseVexEncoding())
2333 // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
2334 LocationInfoListNode* op2Info = useList.Begin()->Next();
2335 LocationInfoListNode* op3Info = op2Info->Next();
2336 op2Info->info.isDelayFree = true;
2337 op3Info->info.isDelayFree = true;
2338 op3Info->info.setSrcCandidates(this, RBM_XMM0);
2339 info->hasDelayFreeSrc = true;
2344 case NI_SSE42_Crc32:
2346 // CRC32 may operate over "byte" but on x86 only RBM_BYTE_REGS can be used as byte registers.
2348 // TODO - currently we use the BaseType to bring the type of the second argument
2349 // to the code generator. May encode the overload info in other way.
2350 var_types srcType = intrinsicTree->gtSIMDBaseType;
2351 if (varTypeIsByte(srcType))
2353 LocationInfoListNode* op2Info = useList.GetSecond(INDEBUG(intrinsicTree->gtGetOp2()));
2354 op2Info->info.setSrcCandidates(this, RBM_BYTE_REGS);
2358 #endif // _TARGET_X86_
2361 assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END));
2367 //------------------------------------------------------------------------
2368 // BuildCast: Set the NodeInfo for a GT_CAST.
2371 // tree - The node of interest
2376 void LinearScan::BuildCast(GenTree* tree)
2378 TreeNodeInfo* info = currentNodeInfo;
2379 // TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
2380 // see CodeGen::genIntToIntCast()
2382 // Non-overflow casts to/from float/double are done using SSE2 instructions
2383 // and that allow the source operand to be either a reg or memop. Given the
2384 // fact that casts from small int to float/double are done as two-level casts,
2385 // the source operand is always guaranteed to be of size 4 or 8 bytes.
2386 var_types castToType = tree->CastToType();
2387 GenTree* castOp = tree->gtCast.CastOp();
2388 var_types castOpType = castOp->TypeGet();
2390 info->srcCount = GetOperandInfo(castOp);
2391 assert(info->dstCount == 1);
2392 if (tree->gtFlags & GTF_UNSIGNED)
2394 castOpType = genUnsignedType(castOpType);
2397 // some overflow checks need a temp reg:
2398 // - GT_CAST from INT64/UINT64 to UINT32
2399 if (tree->gtOverflow() && (castToType == TYP_UINT))
2401 if (genTypeSize(castOpType) == 8)
2403 // Here we don't need internal register to be different from targetReg,
2404 // rather require it to be different from operand's reg.
2405 info->internalIntCount = 1;
2410 //-----------------------------------------------------------------------------------------
2411 // BuildIndir: Specify register requirements for address expression of an indirection operation.
2414 // indirTree - GT_IND or GT_STOREIND gentree node
2416 void LinearScan::BuildIndir(GenTreeIndir* indirTree)
2418 TreeNodeInfo* info = currentNodeInfo;
2419 // If this is the rhs of a block copy (i.e. non-enregisterable struct),
2420 // it has no register requirements.
2421 if (indirTree->TypeGet() == TYP_STRUCT)
2426 int indirSrcCount = GetIndirInfo(indirTree);
2427 if (indirTree->gtOper == GT_STOREIND)
2429 GenTree* source = indirTree->gtOp.gtOp2;
2430 if (indirTree->AsStoreInd()->IsRMWMemoryOp())
2432 // Because 'source' is contained, we haven't yet determined its special register requirements, if any.
2433 // As it happens, the Shift or Rotate cases are the only ones with special requirements.
2434 assert(source->isContained() && source->OperIsRMWMemOp());
2435 GenTree* nonMemSource = nullptr;
2437 if (source->OperIsShiftOrRotate())
2439 info->srcCount += BuildShiftRotate(source);
2443 info->srcCount += appendBinaryLocationInfoToList(source->AsOp());
2445 if (indirTree->AsStoreInd()->IsRMWDstOp1())
2447 if (source->OperIsBinary())
2449 nonMemSource = source->gtOp.gtOp2;
2452 else if (indirTree->AsStoreInd()->IsRMWDstOp2())
2454 nonMemSource = source->gtOp.gtOp1;
2456 if (nonMemSource != nullptr)
2458 assert(!nonMemSource->isContained() || (!nonMemSource->isMemoryOp() && !nonMemSource->IsLocal()));
2460 if (varTypeIsByte(indirTree) && !nonMemSource->isContained())
2462 // If storeInd is of TYP_BYTE, set source to byteable registers.
2463 TreeNodeInfo& nonMemSourceInfo = useList.GetTreeNodeInfo(nonMemSource);
2464 regMaskTP regMask = nonMemSourceInfo.getSrcCandidates(this);
2465 regMask &= ~RBM_NON_BYTE_REGS;
2466 assert(regMask != RBM_NONE);
2467 nonMemSourceInfo.setSrcCandidates(this, regMask);
2475 if (varTypeIsByte(indirTree) && !source->isContained())
2477 // If storeInd is of TYP_BYTE, set source to byteable registers.
2478 LocationInfoListNode* sourceInfo = getLocationInfo(source);
2479 regMaskTP regMask = sourceInfo->info.getSrcCandidates(this);
2480 regMask &= ~RBM_NON_BYTE_REGS;
2481 assert(regMask != RBM_NONE);
2482 sourceInfo->info.setSrcCandidates(this, regMask);
2483 useList.Append(sourceInfo);
2489 info->srcCount += GetOperandInfo(source);
2493 info->srcCount += indirSrcCount;
2496 if (indirTree->TypeGet() == TYP_SIMD12)
2498 // If indirTree is of TYP_SIMD12, addr is not contained. See comment in LowerIndir().
2499 assert(!indirTree->Addr()->isContained());
2501 // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
2502 // To assemble the vector properly we would need an additional
2504 info->internalFloatCount = 1;
2506 // In case of GT_IND we need an internal register different from targetReg and
2507 // both of the registers are used at the same time.
2508 if (indirTree->OperGet() == GT_IND)
2510 info->isInternalRegDelayFree = true;
2513 info->setInternalCandidates(this, allSIMDRegs());
2517 #endif // FEATURE_SIMD
2519 assert(indirTree->Addr()->gtOper != GT_ARR_ELEM);
2522 //------------------------------------------------------------------------
2523 // BuildMul: Set the NodeInfo for a multiply.
2526 // tree - The node of interest
2531 void LinearScan::BuildMul(GenTree* tree)
2533 TreeNodeInfo* info = currentNodeInfo;
2534 #if defined(_TARGET_X86_)
2535 assert(tree->OperIs(GT_MUL, GT_MULHI, GT_MUL_LONG));
2537 assert(tree->OperIs(GT_MUL, GT_MULHI));
2539 GenTree* op1 = tree->gtOp.gtOp1;
2540 GenTree* op2 = tree->gtOp.gtOp2;
2541 info->srcCount = appendBinaryLocationInfoToList(tree->AsOp());
2542 assert(info->dstCount == 1);
2544 // Case of float/double mul.
2545 if (varTypeIsFloating(tree->TypeGet()))
2550 bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0);
2551 bool requiresOverflowCheck = tree->gtOverflowEx();
2553 // There are three forms of x86 multiply:
2554 // one-op form: RDX:RAX = RAX * r/m
2555 // two-op form: reg *= r/m
2556 // three-op form: reg = r/m * imm
2558 // This special widening 32x32->64 MUL is not used on x64
2559 CLANG_FORMAT_COMMENT_ANCHOR;
2560 #if defined(_TARGET_X86_)
2561 if (tree->OperGet() != GT_MUL_LONG)
2564 assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
2567 // We do use the widening multiply to implement
2568 // the overflow checking for unsigned multiply
2570 if (isUnsignedMultiply && requiresOverflowCheck)
2572 // The only encoding provided is RDX:RAX = RAX * rm
2574 // Here we set RAX as the only destination candidate
2575 // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
2577 info->setDstCandidates(this, RBM_RAX);
2579 else if (tree->OperGet() == GT_MULHI)
2581 // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the
2582 // upper 32 bits of the result set the destination candidate to REG_RDX.
2583 info->setDstCandidates(this, RBM_RDX);
2585 #if defined(_TARGET_X86_)
2586 else if (tree->OperGet() == GT_MUL_LONG)
2588 // have to use the encoding:RDX:RAX = RAX * rm
2589 info->setDstCandidates(this, RBM_RAX);
2592 GenTree* containedMemOp = nullptr;
2593 if (op1->isContained() && !op1->IsCnsIntOrI())
2595 assert(!op2->isContained() || op2->IsCnsIntOrI());
2596 containedMemOp = op1;
2598 else if (op2->isContained() && !op2->IsCnsIntOrI())
2600 containedMemOp = op2;
2602 if ((containedMemOp != nullptr) && CheckAndSetDelayFree(containedMemOp))
2604 info->hasDelayFreeSrc = true;
2608 //------------------------------------------------------------------------------
2609 // SetContainsAVXFlags: Set ContainsAVX flag when it is floating type, set
2610 // Contains256bitAVX flag when SIMD vector size is 32 bytes
2613 // isFloatingPointType - true if it is floating point type
2614 // sizeOfSIMDVector - SIMD Vector size
2616 void LinearScan::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsigned sizeOfSIMDVector /* = 0*/)
2618 if (isFloatingPointType && compiler->canUseVexEncoding())
2620 compiler->getEmitter()->SetContainsAVX(true);
2621 if (sizeOfSIMDVector == 32)
2623 compiler->getEmitter()->SetContains256bitAVX(true);
2629 //------------------------------------------------------------------------
2630 // ExcludeNonByteableRegisters: Determines if we need to exclude non-byteable registers for
2634 // tree - The node of interest
2637 // If we need to exclude non-byteable registers
2639 bool LinearScan::ExcludeNonByteableRegisters(GenTree* tree)
2641 // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
2642 // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
2643 // value. In this case we need to exclude esi/edi from the src candidates of op2.
2644 if (varTypeIsByte(tree))
2648 // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
2649 else if ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType()))
2653 else if (tree->OperIsCompare() || tree->OperIs(GT_CMP))
2655 GenTree* op1 = tree->gtGetOp1();
2656 GenTree* op2 = tree->gtGetOp2();
2658 // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
2659 // ubyte as the result of comparison and if the result needs to be materialized into a reg
2660 // simply zero extend it to TYP_INT size. Here is an example of generated code:
2661 // cmp dl, byte ptr[addr mode]
2663 if (varTypeIsByte(op1) && varTypeIsByte(op2))
2667 // Example4: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses
2668 // ubyte as the result of the comparison and if the result needs to be materialized into a reg
2669 // simply zero extend it to TYP_INT size.
2670 else if (varTypeIsByte(op1) && op2->IsCnsIntOrI())
2674 // Example4: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses
2675 // ubyte as the result of the comparison and if the result needs to be materialized into a reg
2676 // simply zero extend it to TYP_INT size.
2677 else if (op1->IsCnsIntOrI() && varTypeIsByte(op2))
2687 else if (tree->OperGet() == GT_SIMD)
2689 GenTreeSIMD* simdNode = tree->AsSIMD();
2690 switch (simdNode->gtSIMDIntrinsicID)
2692 case SIMDIntrinsicOpEquality:
2693 case SIMDIntrinsicOpInEquality:
2694 // We manifest it into a byte register, so the target must be byteable.
2697 case SIMDIntrinsicGetItem:
2699 // This logic is duplicated from genSIMDIntrinsicGetItem().
2700 // When we generate code for a SIMDIntrinsicGetItem, under certain circumstances we need to
2701 // generate a movzx/movsx. On x86, these require byteable registers. So figure out which
2702 // cases will require this, so the non-byteable registers can be excluded.
2704 GenTree* op1 = simdNode->gtGetOp1();
2705 GenTree* op2 = simdNode->gtGetOp2();
2706 var_types baseType = simdNode->gtSIMDBaseType;
2707 if (!isContainableMemoryOp(op1) && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType))
2709 bool ZeroOrSignExtnReqd = true;
2710 unsigned baseSize = genTypeSize(baseType);
2713 if ((op2->gtIntCon.gtIconVal % 2) == 1)
2715 ZeroOrSignExtnReqd = (baseType == TYP_BYTE);
2720 assert(baseSize == 2);
2721 ZeroOrSignExtnReqd = (baseType == TYP_SHORT);
2723 return ZeroOrSignExtnReqd;
2733 #endif // FEATURE_SIMD
2739 #endif // _TARGET_X86_
2741 #endif // _TARGET_XARCH_
2743 #endif // !LEGACY_BACKEND