From 8abf567fb31d01129ab44a955b9c3bd1fe4b3633 Mon Sep 17 00:00:00 2001 From: Jarret Shook Date: Thu, 14 Jun 2018 13:16:45 -0700 Subject: [PATCH] [Windows|Arm64|Vararg] Add FEATURE_ARG_SPLIT (dotnet/coreclr#18346) * [ARM64|Windows|Vararg] Add FEATURE_ARG_SPLIT Enable splitting >8 byte <= 16 byte structs for arm64 varargs between x7 and virtual stack slot 0. * Force notHfa for vararg methods * Correctly pass isVararg * Correct var name Commit migrated from https://github.com/dotnet/coreclr/commit/35c84dfc414a8817ef74df2c2cd0486740fc11f4 --- src/coreclr/src/jit/codegenarmarch.cpp | 14 +++++++----- src/coreclr/src/jit/codegencommon.cpp | 9 ++------ src/coreclr/src/jit/codegenlinear.cpp | 14 +++++++----- src/coreclr/src/jit/codegenlinear.h | 8 +++---- src/coreclr/src/jit/compiler.h | 10 ++++----- src/coreclr/src/jit/compiler.hpp | 4 ++-- src/coreclr/src/jit/gentree.cpp | 28 ++++++++++++++---------- src/coreclr/src/jit/gentree.h | 8 +++---- src/coreclr/src/jit/gtlist.h | 4 ++-- src/coreclr/src/jit/gtstructs.h | 6 ++--- src/coreclr/src/jit/jit.h | 16 +++++++++++--- src/coreclr/src/jit/lclvars.cpp | 34 +++++++++++++++++++++++++++++ src/coreclr/src/jit/lower.cpp | 10 +++++---- src/coreclr/src/jit/lsra.cpp | 16 +++++++++----- src/coreclr/src/jit/lsra.h | 4 ++-- src/coreclr/src/jit/lsraarm64.cpp | 6 +++++ src/coreclr/src/jit/lsraarmarch.cpp | 14 +++++++----- src/coreclr/src/jit/morph.cpp | 40 ++++++++++++++++++++++++---------- 18 files changed, 163 insertions(+), 82 deletions(-) diff --git a/src/coreclr/src/jit/codegenarmarch.cpp b/src/coreclr/src/jit/codegenarmarch.cpp index 7fc11dd..f5053f7 100644 --- a/src/coreclr/src/jit/codegenarmarch.cpp +++ b/src/coreclr/src/jit/codegenarmarch.cpp @@ -325,11 +325,11 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genPutArgReg(treeNode->AsOp()); break; -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: genPutArgSplit(treeNode->AsPutArgSplit()); break; -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT case GT_CALL: genCallInstruction(treeNode->AsCall()); @@ -961,7 +961,7 @@ void CodeGen::genPutArgReg(GenTreeOp* tree) genProduceReg(tree); } -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT //--------------------------------------------------------------------- // genPutArgSplit - generate code for a GT_PUTARG_SPLIT node // @@ -1008,6 +1008,7 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) { var_types type = treeNode->GetRegType(regIndex); regNumber argReg = treeNode->GetRegNumByIdx(regIndex); +#ifdef _TARGET_ARM_ if (type == TYP_LONG) { // We should only see long fields for DOUBLEs passed in 2 integer registers, via bitcast. @@ -1025,6 +1026,7 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) assert(argReg == treeNode->GetRegNumByIdx(regIndex)); fieldReg = nextArgNode->AsMultiRegOp()->GetRegNumByIdx(1); } +#endif // _TARGET_ARM_ // If child node is not already in the register we need, move it if (argReg != fieldReg) @@ -1175,7 +1177,7 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) } genProduceReg(treeNode); } -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT //---------------------------------------------------------------------------------- // genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local @@ -2203,7 +2205,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) #endif // _TARGET_ARM_ } } -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT else if (curArgTabEntry->isSplit) { assert(curArgTabEntry->numRegs >= 1); @@ -2218,7 +2220,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } } } -#endif +#endif // FEATURE_ARG_SPLIT else { regNumber argReg = curArgTabEntry->regNum; diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index 52c3587..08480cf 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -3298,14 +3298,14 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere } #endif -#ifdef _TARGET_ARM64_ +#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) if (compiler->info.compIsVarArgs) { // We've already saved all int registers at the top of stack in the prolog. // No need further action. return; } -#endif +#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) unsigned argMax; // maximum argNum value plus 1, (including the RetBuffArg) unsigned argNum; // current argNum, always in [0..argMax-1] @@ -4986,11 +4986,6 @@ void CodeGen::genPushCalleeSavedRegisters() regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT; regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat; - if (compiler->info.compIsVarArgs) - { - assert(maskSaveRegsFloat == RBM_NONE); - } - int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we // generate based on various sizes. int calleeSaveSPDelta = 0; diff --git a/src/coreclr/src/jit/codegenlinear.cpp b/src/coreclr/src/jit/codegenlinear.cpp index 8e1efa9..21972f9 100644 --- a/src/coreclr/src/jit/codegenlinear.cpp +++ b/src/coreclr/src/jit/codegenlinear.cpp @@ -998,7 +998,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) unspillTree->gtFlags &= ~GTF_SPILLED; } -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT else if (unspillTree->OperIsPutArgSplit()) { GenTreePutArgSplit* splitArg = unspillTree->AsPutArgSplit(); @@ -1026,6 +1026,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) unspillTree->gtFlags &= ~GTF_SPILLED; } +#ifdef _TARGET_ARM_ else if (unspillTree->OperIsMultiRegOp()) { GenTreeMultiRegOp* multiReg = unspillTree->AsMultiRegOp(); @@ -1052,7 +1053,8 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) unspillTree->gtFlags &= ~GTF_SPILLED; } -#endif +#endif //_TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT else { TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum); @@ -1442,7 +1444,7 @@ void CodeGen::genConsumePutStructArgStk(GenTreePutArgStk* putArgNode, } #endif // FEATURE_PUT_STRUCT_ARG_STK -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT //------------------------------------------------------------------------ // genConsumeArgRegSplit: Consume register(s) in Call node to set split struct argument. // Liveness update for the PutArgSplit node is not needed @@ -1465,7 +1467,7 @@ void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode) genCheckConsumeNode(putArgNode); } -#endif +#endif // FEATURE_ARG_SPLIT //------------------------------------------------------------------------ // genPutArgStkFieldList: Generate code for a putArgStk whose source is a GT_FIELD_LIST @@ -1700,7 +1702,7 @@ void CodeGen::genProduceReg(GenTree* tree) } } } -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT else if (tree->OperIsPutArgSplit()) { GenTreePutArgSplit* argSplit = tree->AsPutArgSplit(); @@ -1717,6 +1719,7 @@ void CodeGen::genProduceReg(GenTree* tree) } } } +#ifdef _TARGET_ARM_ else if (tree->OperIsMultiRegOp()) { GenTreeMultiRegOp* multiReg = tree->AsMultiRegOp(); @@ -1734,6 +1737,7 @@ void CodeGen::genProduceReg(GenTree* tree) } } #endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT else { regSet.rsSpillTree(tree->gtRegNum, tree); diff --git a/src/coreclr/src/jit/codegenlinear.h b/src/coreclr/src/jit/codegenlinear.h index 140bc98..6383985 100644 --- a/src/coreclr/src/jit/codegenlinear.h +++ b/src/coreclr/src/jit/codegenlinear.h @@ -43,9 +43,9 @@ void genCodeForCompare(GenTreeOp* tree); void genIntrinsic(GenTree* treeNode); void genPutArgStk(GenTreePutArgStk* treeNode); void genPutArgReg(GenTreeOp* tree); -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT void genPutArgSplit(GenTreePutArgSplit* treeNode); -#endif +#endif // FEATURE_ARG_SPLIT #if defined(_TARGET_XARCH_) unsigned getBaseVarForPutArgStk(GenTree* treeNode); @@ -194,9 +194,9 @@ void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, #ifdef FEATURE_PUT_STRUCT_ARG_STK void genConsumePutStructArgStk(GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg); #endif // FEATURE_PUT_STRUCT_ARG_STK -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT void genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode); -#endif +#endif // FEATURE_ARG_SPLIT void genConsumeRegs(GenTree* tree); void genConsumeOperands(GenTreeOp* tree); diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index e08fa1f..2e8d27d 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -1165,9 +1165,9 @@ public: // to be on the stack despite its arg list position. bool isStruct : 1; // True if this is a struct arg bool _isVararg : 1; // True if the argument is in a vararg context. -#ifdef _TARGET_ARM_ +#ifdef FEATURE_ARG_SPLIT bool _isSplit : 1; // True when this argument is split between the registers and OutArg area -#endif +#endif // FEATURE_ARG_SPLIT #ifdef FEATURE_HFA bool _isHfaRegArg : 1; // True when the argument is passed as a HFA in FP registers. bool _isDoubleHfa : 1; // True when the argument is passed as an HFA, with an element type of DOUBLE. @@ -1201,15 +1201,15 @@ public: __declspec(property(get = getIsSplit, put = setIsSplit)) bool isSplit; bool getIsSplit() { -#ifdef _TARGET_ARM_ +#ifdef FEATURE_ARG_SPLIT return _isSplit; -#else +#else // FEATURE_ARG_SPLIT return false; #endif } void setIsSplit(bool value) { -#ifdef _TARGET_ARM_ +#ifdef FEATURE_ARG_SPLIT _isSplit = value; #endif } diff --git a/src/coreclr/src/jit/compiler.hpp b/src/coreclr/src/jit/compiler.hpp index f7c7b3d..51c0299 100644 --- a/src/coreclr/src/jit/compiler.hpp +++ b/src/coreclr/src/jit/compiler.hpp @@ -4682,9 +4682,9 @@ void GenTree::VisitOperands(TVisitor visitor) case GT_NULLCHECK: case GT_PUTARG_REG: case GT_PUTARG_STK: -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: -#endif +#endif // FEATURE_ARG_SPLIT case GT_RETURNTRAP: visitor(this->AsUnOp()->gtOp1); return; diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 1bc28e7..5b0e654 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -289,9 +289,9 @@ void GenTree::InitNodeSize() // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE; -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT GenTree::s_gtNodeSizes[GT_PUTARG_SPLIT] = TREE_NODE_SZ_LARGE; -#endif +#endif // FEATURE_ARG_SPLIT #endif // FEATURE_PUT_STRUCT_ARG_STK assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]); @@ -352,9 +352,9 @@ void GenTree::InitNodeSize() // TODO-Throughput: This should not need to be a large node. The object info should be // obtained from the child node. static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE); -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT static_assert_no_msg(sizeof(GenTreePutArgSplit) <= TREE_NODE_SZ_LARGE); -#endif +#endif // FEATURE_ARG_SPLIT #endif // FEATURE_PUT_STRUCT_ARG_STK #ifdef FEATURE_SIMD @@ -742,7 +742,7 @@ int GenTree::GetRegisterDstCount() const { return gtGetOp1()->GetRegisterDstCount(); } -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT else if (OperIsPutArgSplit()) { return (const_cast(this))->AsPutArgSplit()->gtNumRegs; @@ -751,10 +751,14 @@ int GenTree::GetRegisterDstCount() const // either for all double parameters w/SoftFP or for varargs). else { +#ifdef _TARGET_ARM_ assert(OperIsMultiRegOp()); return (TypeGet() == TYP_LONG) ? 2 : 1; +#else + unreached(); +#endif // _TARGET_ARM_ } -#endif // defined(_TARGET_ARM_) +#endif // FEATURE_ARG_SPLIT assert(!"Unexpected multi-reg node"); return 0; } @@ -800,7 +804,7 @@ regMaskTP GenTree::gtGetRegMask() const } } } -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT else if (OperIsPutArgSplit()) { GenTree* tree = const_cast(this); @@ -815,7 +819,7 @@ regMaskTP GenTree::gtGetRegMask() const resultMask |= genRegMask(reg); } } -#endif +#endif // FEATURE_ARG_SPLIT else { resultMask = genRegMask(gtRegNum); @@ -5150,7 +5154,7 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use) case GT_FIELD_LIST: return TryGetUseList(def, use); -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: if (this->AsUnOp()->gtOp1->gtOper == GT_FIELD_LIST) { @@ -5162,7 +5166,7 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use) return true; } return false; -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT #ifdef FEATURE_SIMD case GT_SIMD: @@ -8772,9 +8776,9 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_NULLCHECK: case GT_PUTARG_REG: case GT_PUTARG_STK: -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT case GT_RETURNTRAP: m_edge = &m_node->AsUnOp()->gtOp1; assert(*m_edge != nullptr); diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 009646d..4baed55 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -1182,9 +1182,9 @@ public: bool OperIsPutArgSplit() const { -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT return gtOper == GT_PUTARG_SPLIT; -#else +#else // !FEATURE_ARG_SPLIT return false; #endif } @@ -5195,7 +5195,7 @@ struct GenTreePutArgStk : public GenTreeUnOp #endif }; -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT // Represent the struct argument: split value in register(s) and stack struct GenTreePutArgSplit : public GenTreePutArgStk { @@ -5398,7 +5398,7 @@ struct GenTreePutArgSplit : public GenTreePutArgStk } #endif }; -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT // Represents GT_COPY or GT_RELOAD node struct GenTreeCopyOrReload : public GenTreeUnOp diff --git a/src/coreclr/src/jit/gtlist.h b/src/coreclr/src/jit/gtlist.h index bbd9bcb..dd972b6 100644 --- a/src/coreclr/src/jit/gtlist.h +++ b/src/coreclr/src/jit/gtlist.h @@ -294,9 +294,9 @@ GTNODE(PUTARG_REG , GenTreeMultiRegOp ,0,GTK_UNOP) GTNODE(PUTARG_REG , GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register #endif GTNODE(PUTARG_STK , GenTreePutArgStk ,0,GTK_UNOP|GTK_NOVALUE) // operator that places outgoing arg in stack -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT GTNODE(PUTARG_SPLIT , GenTreePutArgSplit ,0,GTK_UNOP) // operator that places outgoing arg in registers with stack (split struct in ARM32) -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT GTNODE(RETURNTRAP , GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc GTNODE(SWAP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers) GTNODE(IL_OFFSET , GenTreeStmt ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes diff --git a/src/coreclr/src/jit/gtstructs.h b/src/coreclr/src/jit/gtstructs.h index fe67a2f..bc64a34 100644 --- a/src/coreclr/src/jit/gtstructs.h +++ b/src/coreclr/src/jit/gtstructs.h @@ -105,12 +105,12 @@ GTSTRUCT_1(Qmark , GT_QMARK) GTSTRUCT_1(PhiArg , GT_PHI_ARG) GTSTRUCT_1(StoreInd , GT_STOREIND) GTSTRUCT_N(Indir , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_DYN_BLK, GT_STORE_DYN_BLK) -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT GTSTRUCT_2_SPECIAL(PutArgStk, GT_PUTARG_STK, GT_PUTARG_SPLIT) GTSTRUCT_1(PutArgSplit , GT_PUTARG_SPLIT) -#else // !defined(_TARGET_ARM_) +#else // !FEATURE_ARG_SPLIT GTSTRUCT_1(PutArgStk , GT_PUTARG_STK) -#endif // !defined(_TARGET_ARM_) +#endif // !FEATURE_ARG_SPLIT GTSTRUCT_1(PhysReg , GT_PHYSREG) #ifdef FEATURE_SIMD GTSTRUCT_1(SIMD , GT_SIMD) diff --git a/src/coreclr/src/jit/jit.h b/src/coreclr/src/jit/jit.h index 75efc7d..0188608 100644 --- a/src/coreclr/src/jit/jit.h +++ b/src/coreclr/src/jit/jit.h @@ -276,14 +276,15 @@ #define UNIX_AMD64_ABI_ONLY(x) #endif // defined(UNIX_AMD64_ABI) -#if defined(UNIX_AMD64_ABI) || !defined(_TARGET_64BIT_) +#if defined(UNIX_AMD64_ABI) || !defined(_TARGET_64BIT_) || (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)) #define FEATURE_PUT_STRUCT_ARG_STK 1 #define PUT_STRUCT_ARG_STK_ONLY_ARG(x) , x #define PUT_STRUCT_ARG_STK_ONLY(x) x -#else // !(defined(UNIX_AMD64_ABI) || !defined(_TARGET_64BIT_)) +#else // !(defined(UNIX_AMD64_ABI) && defined(_TARGET_64BIT_) && !(defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)) #define PUT_STRUCT_ARG_STK_ONLY_ARG(x) #define PUT_STRUCT_ARG_STK_ONLY(x) -#endif // !(defined(UNIX_AMD64_ABI)|| !defined(_TARGET_64BIT_)) +#endif // !(defined(UNIX_AMD64_ABI) && defined(_TARGET_64BIT_) && !(defined(_TARGET_WINDOWS_) && + // defined(_TARGET_ARM64_)) #if defined(UNIX_AMD64_ABI) #define UNIX_AMD64_ABI_ONLY_ARG(x) , x @@ -303,6 +304,15 @@ #define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) #endif // defined(UNIX_AMD64_ABI) +// Arm64 Windows supports FEATURE_ARG_SPLIT, note this is different from +// the official Arm64 ABI. +// Case: splitting 16 byte struct between x7 and stack +#if (defined(_TARGET_ARM_) || (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_))) +#define FEATURE_ARG_SPLIT 1 +#else +#define FEATURE_ARG_SPLIT 0 +#endif // (defined(_TARGET_ARM_) || (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_))) + // To get rid of warning 4701 : local variable may be used without being initialized #define DUMMY_INIT(x) (x) diff --git a/src/coreclr/src/jit/lclvars.cpp b/src/coreclr/src/jit/lclvars.cpp index 93933ef..e0e8f74 100644 --- a/src/coreclr/src/jit/lclvars.cpp +++ b/src/coreclr/src/jit/lclvars.cpp @@ -626,6 +626,24 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) // it enregistered, as long as we can split the rest onto the stack. unsigned cSlotsToEnregister = cSlots; +#if defined(_TARGET_ARM64_) && FEATURE_ARG_SPLIT + + // On arm64 Windows we will need to properly handle the case where a >8byte <=16byte + // struct is split between register r7 and virtual stack slot s[0] + // We will only do this for calls to vararg methods on Windows Arm64 + // + // !!This does not affect the normal arm64 calling convention or Unix Arm64!! + if (this->info.compIsVarArgs && argType == TYP_STRUCT) + { + if (varDscInfo->canEnreg(TYP_INT, 1) && // The beginning of the struct can go in a register + !varDscInfo->canEnreg(TYP_INT, cSlots)) // The end of the struct can't fit in a register + { + cSlotsToEnregister = 1; // Force the split + } + } + +#endif // defined(_TARGET_ARM64_) && FEATURE_ARG_SPLIT + #ifdef _TARGET_ARM_ // On ARM we pass the first 4 words of integer arguments and non-HFA structs in registers. // But we pre-spill user arguments in varargs methods and structs. @@ -643,6 +661,8 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) // arguments passed in the integer registers but get homed immediately after the prolog. if (!isHfaArg) { + // TODO-Arm32-Windows: vararg struct should be forced to split like + // ARM64 above. cSlotsToEnregister = 1; // HFAs must be totally enregistered or not, but other structs can be split. preSpill = true; } @@ -5210,6 +5230,20 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, argOffs += TARGET_POINTER_SIZE; #elif defined(_TARGET_ARM64_) // Register arguments on ARM64 only take stack space when they have a frame home. +// Unless on windows and in a vararg method. +#if FEATURE_ARG_SPLIT + if (this->info.compIsVarArgs) + { + if (varDsc->lvType == TYP_STRUCT && varDsc->lvOtherArgReg >= MAX_REG_ARG && varDsc->lvOtherArgReg != REG_NA) + { + // This is a split struct. It will account for an extra (8 bytes) + // of allignment. + varDsc->lvStkOffs += TARGET_POINTER_SIZE; + argOffs += TARGET_POINTER_SIZE; + } + } +#endif // FEATURE_ARG_SPLIT + #elif defined(_TARGET_ARM_) // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg // in the prolog, so we have to fill in lvStkOffs here diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index defc34c..ce04c28 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -1039,7 +1039,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf } #endif -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT // Struct can be split into register(s) and stack on ARM if (info->isSplit) { @@ -1047,7 +1047,9 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf // TODO: Need to check correctness for FastTailCall if (call->IsFastTailCall()) { +#ifdef _TARGET_ARM_ NYI_ARM("lower: struct argument by fast tail call"); +#endif // _TARGET_ARM_ } putArg = new (comp, GT_PUTARG_SPLIT) @@ -1100,7 +1102,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf } } else -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT { if (!isOnStack) { @@ -5715,9 +5717,9 @@ void Lowering::ContainCheckNode(GenTree* node) break; case GT_PUTARG_REG: case GT_PUTARG_STK: -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: -#endif +#endif // FEATURE_ARG_SPLIT // The regNum must have been set by the lowering of the call. assert(node->gtRegNum != REG_NA); break; diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index a5e676f..27d29df 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -130,13 +130,15 @@ void lsraAssignRegToTree(GenTree* tree, regNumber reg, unsigned regIdx) { tree->gtRegNum = reg; } -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT +#ifdef _TARGET_ARM_ else if (tree->OperIsMultiRegOp()) { assert(regIdx == 1); GenTreeMultiRegOp* mul = tree->AsMultiRegOp(); mul->gtOtherReg = reg; } +#endif // _TARGET_ARM_ else if (tree->OperGet() == GT_COPY) { assert(regIdx == 1); @@ -148,7 +150,7 @@ void lsraAssignRegToTree(GenTree* tree, regNumber reg, unsigned regIdx) GenTreePutArgSplit* putArg = tree->AsPutArgSplit(); putArg->SetRegNumByIdx(reg, regIdx); } -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT else { assert(tree->IsMultiRegCall()); @@ -6442,7 +6444,7 @@ void LinearScan::updateMaxSpill(RefPosition* refPosition) ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc(); typ = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx()); } -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT else if (treeNode->OperIsPutArgSplit()) { typ = treeNode->AsPutArgSplit()->GetRegType(refPosition->getMultiRegIdx()); @@ -6454,7 +6456,7 @@ void LinearScan::updateMaxSpill(RefPosition* refPosition) var_types typNode = treeNode->TypeGet(); typ = (typNode == TYP_LONG) ? TYP_INT : typNode; } -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT else { typ = treeNode->TypeGet(); @@ -6771,18 +6773,20 @@ void LinearScan::resolveRegisters() GenTreeCall* call = treeNode->AsCall(); call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx()); } -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT else if (treeNode->OperIsPutArgSplit()) { GenTreePutArgSplit* splitArg = treeNode->AsPutArgSplit(); splitArg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx()); } +#ifdef _TARGET_ARM_ else if (treeNode->OperIsMultiRegOp()) { GenTreeMultiRegOp* multiReg = treeNode->AsMultiRegOp(); multiReg->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx()); } -#endif +#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT } // If the value is reloaded or moved to a different register, we need to insert diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 3b54c7b..84ff2e2 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -1668,9 +1668,9 @@ private: #endif // FEATURE_HW_INTRINSICS int BuildPutArgStk(GenTreePutArgStk* argNode); -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT int BuildPutArgSplit(GenTreePutArgSplit* tree); -#endif +#endif // FEATURE_ARG_SPLIT int BuildLclHeap(GenTree* tree); }; diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 9cc17ca..7117c1f 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -481,6 +481,12 @@ int LinearScan::BuildNode(GenTree* tree) } break; +#if FEATURE_ARG_SPLIT + case GT_PUTARG_SPLIT: + BuildPutArgSplit(tree->AsPutArgSplit()); + break; +#endif // FEATURE _SPLIT_ARG + case GT_PUTARG_STK: srcCount = BuildPutArgStk(tree->AsPutArgStk()); break; diff --git a/src/coreclr/src/jit/lsraarmarch.cpp b/src/coreclr/src/jit/lsraarmarch.cpp index e0886eb..1804d4c 100644 --- a/src/coreclr/src/jit/lsraarmarch.cpp +++ b/src/coreclr/src/jit/lsraarmarch.cpp @@ -276,7 +276,7 @@ int LinearScan::BuildCall(GenTreeCall* call) srcCount++; } } -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT else if (argNode->OperGet() == GT_PUTARG_SPLIT) { unsigned regCount = argNode->AsPutArgSplit()->gtNumRegs; @@ -287,7 +287,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } srcCount += regCount; } -#endif +#endif // FEATURE_ARG_SPLIT else { assert(argNode->OperIs(GT_PUTARG_REG)); @@ -332,11 +332,11 @@ int LinearScan::BuildCall(GenTreeCall* call) fgArgTabEntry* curArgTabEntry = compiler->gtArgEntryByNode(call, arg); assert(curArgTabEntry); #endif -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT // PUTARG_SPLIT nodes must be in the gtCallLateArgs list, since they // define registers used by the call. assert(arg->OperGet() != GT_PUTARG_SPLIT); -#endif +#endif // FEATURE_ARG_SPLIT if (arg->gtOper == GT_PUTARG_STK) { assert(curArgTabEntry->regNum == REG_STK); @@ -453,7 +453,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode) return srcCount; } -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT //------------------------------------------------------------------------ // BuildPutArgSplit: Set the NodeInfo for a GT_PUTARG_SPLIT node // @@ -501,11 +501,13 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) assert(!node->isContained()); // The only multi-reg nodes we should see are OperIsMultiRegOp() unsigned currentRegCount; +#ifdef _TARGET_ARM_ if (node->OperIsMultiRegOp()) { currentRegCount = node->AsMultiRegOp()->GetRegCount(); } else +#endif // _TARGET_ARM { assert(!node->IsMultiRegNode()); currentRegCount = 1; @@ -550,7 +552,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) BuildDefs(argNode, dstCount, argMask); return srcCount; } -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT //------------------------------------------------------------------------ // BuildBlockStore: Set the NodeInfo for a block store. diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 94b176e..f9c8181 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -1473,13 +1473,13 @@ void fgArgInfo::ArgsComplete() continue; #endif } -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT else if (curArgTabEntry->isSplit) { hasStructRegArg = true; hasStackArgs = true; } -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT else // we have a register argument, next we look for a struct type. { if (varTypeIsStruct(argx) UNIX_AMD64_ABI_ONLY(|| curArgTabEntry->isStruct)) @@ -1599,7 +1599,7 @@ void fgArgInfo::ArgsComplete() { prevArgTabEntry->needPlace = true; } -#if defined(_TARGET_ARM_) +#if FEATURE_ARG_SPLIT else if (prevArgTabEntry->isSplit) { prevArgTabEntry->needPlace = true; @@ -3323,6 +3323,12 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } } +#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) + // Make sure for vararg methods isHfaArg is not + // true. + isHfaArg = callIsVararg ? false : isHfaArg; +#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) + if (isHfaArg) { // If we have a HFA struct it's possible we transition from a method that originally @@ -3972,13 +3978,25 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // if (!isRegArg && (size > 1)) { - // We also must update intArgRegNum so that we no longer try to - // allocate any new general purpose registers for args - // - intArgRegNum = maxRegArgs; +#if defined(_TARGET_WINDOWS_) + // Arm64 windows native varargs allows splitting a 16 byte struct between stack + // and the last general purpose register. + if (callIsVararg) + { + // Override the decision and force a split. + isRegArg = isRegArg = (intArgRegNum + (size - 1)) <= maxRegArgs; + } + else +#endif // defined(_TARGET_WINDOWS_) + { + // We also must update intArgRegNum so that we no longer try to + // allocate any new general purpose registers for args + // + intArgRegNum = maxRegArgs; + } } } -#else // not _TARGET_ARM_ or _TARGET_ARM64_ +#else // not _TARGET_ARM_ or _TARGET_ARM64_ #if defined(UNIX_AMD64_ABI) @@ -4177,7 +4195,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) else #endif // defined(UNIX_AMD64_ABI) { -#ifdef _TARGET_ARM_ +#if FEATURE_ARG_SPLIT // Check for a split (partially enregistered) struct if (!passUsingFloatRegs && (intArgRegNum + size) > MAX_REG_ARG) { @@ -4189,7 +4207,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial); fgPtrArgCntCur += size - numRegsPartial; } -#endif // _TARGET_ARM_ +#endif // FEATURE_ARG_SPLIT newArgEntry->SetMultiRegNums(); if (passUsingFloatRegs) @@ -4230,7 +4248,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) else { // This is a stack argument - put it in the table - call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign, isStructArg); + call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign, isStructArg, callIsVararg); } } -- 2.7.4