From 0b0c0983b2f609b571bed3c352c76897679108e6 Mon Sep 17 00:00:00 2001 From: Hanjoung Lee Date: Mon, 19 Jun 2017 16:50:02 +0900 Subject: [PATCH] [RyuJIT/armel] Support `double` argument passing - Fix for putting `double` arguments between Lowering and Codegen phase - Rename GenTreeMulLong to GenTreeMultiRegOp GT_PUTARG_REG could be GenTreeMultiRegOp on RyuJIT/arm Fix dotnet/coreclr#12293 Commit migrated from https://github.com/dotnet/coreclr/commit/55eede4c2ad0c5f4849fda9544138f9096847ba4 --- src/coreclr/src/jit/codegenarm.cpp | 5 +++-- src/coreclr/src/jit/codegenarmarch.cpp | 10 ++++------ src/coreclr/src/jit/codegenlinear.h | 2 +- src/coreclr/src/jit/compiler.h | 2 ++ src/coreclr/src/jit/gentree.cpp | 33 +++++++++++++++++++++++++++++++++ src/coreclr/src/jit/gentree.h | 12 ++++++++---- src/coreclr/src/jit/gtlist.h | 6 +++++- src/coreclr/src/jit/gtstructs.h | 2 +- src/coreclr/src/jit/lower.cpp | 9 +++++---- src/coreclr/src/jit/lsra.cpp | 25 +++++++++++++++++++++---- src/coreclr/src/jit/lsraarm.cpp | 31 +++++++++++++++++++++++++++++-- src/coreclr/src/jit/lsraarmarch.cpp | 12 +++++++++++- 12 files changed, 123 insertions(+), 26 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm.cpp b/src/coreclr/src/jit/codegenarm.cpp index 66924c8..5b87311 100644 --- a/src/coreclr/src/jit/codegenarm.cpp +++ b/src/coreclr/src/jit/codegenarm.cpp @@ -1800,7 +1800,7 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) { assert((op1->gtFlags & GTF_MUL_64RSLT) != 0); - GenTreeMulLong* mul = op1->AsMulLong(); + GenTreeMultiRegOp* mul = op1->AsMultiRegOp(); // Stack store getEmitter()->emitIns_S_R(ins_Store(TYP_INT), emitTypeSize(TYP_INT), mul->gtRegNum, lclNum, 0); @@ -1818,8 +1818,9 @@ void CodeGen::genStoreLongLclVar(GenTree* treeNode) // Return Value: // None. // -void CodeGen::genCodeForMulLong(GenTreeMulLong* node) +void CodeGen::genCodeForMulLong(GenTreeMultiRegOp* node) { + assert(node->OperGet() == GT_MUL_LONG); genConsumeOperands(node); GenTree* src1 = node->gtOp1; GenTree* src2 = node->gtOp2; diff --git a/src/coreclr/src/jit/codegenarmarch.cpp b/src/coreclr/src/jit/codegenarmarch.cpp index 86dec5e..5338a04 100644 --- a/src/coreclr/src/jit/codegenarmarch.cpp +++ b/src/coreclr/src/jit/codegenarmarch.cpp @@ -191,7 +191,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) #ifdef _TARGET_ARM_ case GT_MUL_LONG: - genCodeForMulLong(treeNode->AsMulLong()); + genCodeForMulLong(treeNode->AsMultiRegOp()); break; #endif // _TARGET_ARM_ @@ -1661,11 +1661,9 @@ void CodeGen::genRegCopy(GenTree* treeNode) } else { - // TODO-Arm-Bug: We cannot assume the second destination be the next of targetReg - // since LSRA doesn't know that register is used. So we cannot write code like: - // - // inst_RV_RV_RV(INS_vmov_d2i, targetReg, REG_NEXT(targetReg), genConsumeReg(op1), EA_8BYTE); - NYI_ARM("genRegCopy from 'double' to 'int'+'int'"); + regNumber otherReg = (regNumber)treeNode->AsCopyOrReload()->gtOtherRegs[0]; + assert(otherReg != REG_NA); + inst_RV_RV_RV(INS_vmov_d2i, targetReg, otherReg, genConsumeReg(op1), EA_8BYTE); } } #endif // !_TARGET_ARM64_ diff --git a/src/coreclr/src/jit/codegenlinear.h b/src/coreclr/src/jit/codegenlinear.h index 64d5c4f..9cd3040 100644 --- a/src/coreclr/src/jit/codegenlinear.h +++ b/src/coreclr/src/jit/codegenlinear.h @@ -29,7 +29,7 @@ void genScaledAdd(emitAttr attr, regNumber targetReg, regNumber baseReg, regNumb #endif // _TARGET_ARMARCH_ #if defined(_TARGET_ARM_) -void genCodeForMulLong(GenTreeMulLong* treeNode); +void genCodeForMulLong(GenTreeMultiRegOp* treeNode); #endif // _TARGET_ARM_ #if !defined(_TARGET_64BIT_) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index de2d99e..99f9551 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -2025,6 +2025,8 @@ public: GenTree* gtNewBlkOpNode(GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock); + GenTree* gtNewPutArgReg(var_types type, GenTreePtr arg); + protected: void gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOrFillVal, bool isVolatile); diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index ace7e13..aa5f007 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -6998,6 +6998,39 @@ GenTree* Compiler::gtNewBlkOpNode( return result; } +//------------------------------------------------------------------------ +// gtNewPutArgReg: Creates a new PutArgReg node. +// +// Arguments: +// type - The actual type of the argument +// arg - The argument node +// +// Return Value: +// Returns the newly created PutArgReg node. +// +// Notes: +// The node is generated as GenTreeMultiRegOp on armel, as GenTreeOp on all the other archs +// +GenTreePtr Compiler::gtNewPutArgReg(var_types type, GenTreePtr arg) +{ + assert(arg != nullptr); + + GenTreePtr node = nullptr; +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) + // A PUTARG_REG could be a MultiRegOp on armel since we could move a double register to two int registers. + if (opts.compUseSoftFP) + { + node = new (this, GT_PUTARG_REG) GenTreeMultiRegOp(GT_PUTARG_REG, type, arg, nullptr); + } + else +#endif + { + node = gtNewOperNode(GT_PUTARG_REG, type, arg); + } + + return node; +} + /***************************************************************************** * * Clones the given tree value and returns a copy of the given tree. diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 25fc621..5383ffd 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -1637,6 +1637,9 @@ public: #ifdef FEATURE_SIMD case GT_SIMD: #endif // !FEATURE_SIMD +#if !defined(LEGACY_BACKEND) && _TARGET_ARM_ + case GT_PUTARG_REG: +#endif // !defined(LEGACY_BACKEND) && _TARGET_ARM_ return true; default: return false; @@ -3818,16 +3821,17 @@ struct GenTreeCmpXchg : public GenTree }; #if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) -struct GenTreeMulLong : public GenTreeOp +struct GenTreeMultiRegOp : public GenTreeOp { regNumber gtOtherReg; - GenTreeMulLong(var_types type, GenTreePtr op1, GenTreePtr op2) : GenTreeOp(GT_MUL_LONG, type, op1, op2) + GenTreeMultiRegOp(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2) + : GenTreeOp(oper, type, op1, op2), gtOtherReg(REG_NA) { } #if DEBUGGABLE_GENTREE - GenTreeMulLong() : GenTreeOp() + GenTreeMultiRegOp() : GenTreeOp() { } #endif @@ -5694,7 +5698,7 @@ inline bool GenTree::IsMultiRegNode() const } #if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) - if (gtOper == GT_MUL_LONG || OperIsPutArgSplit()) + if (gtOper == GT_MUL_LONG || gtOper == GT_PUTARG_REG || gtOper == GT_COPY || OperIsPutArgSplit()) { return true; } diff --git a/src/coreclr/src/jit/gtlist.h b/src/coreclr/src/jit/gtlist.h index 35c25b9..fdc2158 100644 --- a/src/coreclr/src/jit/gtlist.h +++ b/src/coreclr/src/jit/gtlist.h @@ -194,7 +194,7 @@ GTNODE(MOD_HI , GenTreeOp ,0,GTK_BINOP) #if defined(_TARGET_X86_) GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP) #elif defined (_TARGET_ARM_) -GTNODE(MUL_LONG , GenTreeMulLong ,1,GTK_BINOP) +GTNODE(MUL_LONG , GenTreeMultiRegOp ,1,GTK_BINOP) #endif // The following are nodes that specify shifts that take a GT_LONG op1. The GT_LONG @@ -293,7 +293,11 @@ GTNODE(PHYSREGDST , GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // write GTNODE(EMITNOP , GenTree ,0,GTK_LEAF|GTK_NOVALUE) // emitter-placed nop GTNODE(PINVOKE_PROLOG , GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq GTNODE(PINVOKE_EPILOG , GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq +#if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) +GTNODE(PUTARG_REG , GenTreeMultiRegOp ,0,GTK_UNOP) // operator that places outgoing arg in register +#else GTNODE(PUTARG_REG , GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register +#endif GTNODE(PUTARG_STK , GenTreePutArgStk ,0,GTK_UNOP|GTK_NOVALUE) // operator that places outgoing arg in stack #if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) GTNODE(PUTARG_SPLIT , GenTreePutArgSplit ,0,GTK_UNOP) // operator that places outgoing arg in registers with stack (split struct in ARM32) diff --git a/src/coreclr/src/jit/gtstructs.h b/src/coreclr/src/jit/gtstructs.h index fa462ad..bb3f82b 100644 --- a/src/coreclr/src/jit/gtstructs.h +++ b/src/coreclr/src/jit/gtstructs.h @@ -106,7 +106,7 @@ GTSTRUCT_1(SIMD , GT_SIMD) GTSTRUCT_1(AllocObj , GT_ALLOCOBJ) GTSTRUCT_2(CC , GT_JCC, GT_SETCC) #if !defined(LEGACY_BACKEND) && defined(_TARGET_ARM_) -GTSTRUCT_1(MulLong , GT_MUL_LONG) +GTSTRUCT_2(MultiRegOp , GT_MUL_LONG, GT_PUTARG_REG) #endif /*****************************************************************************/ #undef GTSTRUCT_0 diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 0479b78..d82df0f 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -1008,7 +1008,7 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP #endif // FEATURE_MULTIREG_ARGS #endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) { - putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg); + putArg = comp->gtNewPutArgReg(type, arg); } } else @@ -1224,12 +1224,13 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) { #ifdef _TARGET_ARMARCH_ - // For vararg call, reg args should be all integer. + // For vararg call or on armel, reg args should be all integer. // Insert a copy to move float value to integer register. if ((call->IsVarargs() || comp->opts.compUseSoftFP) && varTypeIsFloating(type)) { - var_types intType = (type == TYP_DOUBLE) ? TYP_LONG : TYP_INT; - GenTreePtr intArg = comp->gtNewOperNode(GT_COPY, intType, arg); + var_types intType = (type == TYP_DOUBLE) ? TYP_LONG : TYP_INT; + + GenTreePtr intArg = new (comp, GT_COPY) GenTreeCopyOrReload(GT_COPY, intType, arg); info->node = intArg; ReplaceArgWithPutArgOrCopy(ppArg, intArg); diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 2af8251..4331d63 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -134,11 +134,17 @@ void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx) tree->gtRegNum = reg; } #if defined(_TARGET_ARM_) - else if (tree->OperGet() == GT_MUL_LONG) + else if (tree->OperGet() == GT_MUL_LONG || tree->OperGet() == GT_PUTARG_REG) { assert(regIdx == 1); - GenTreeMulLong* mul = tree->AsMulLong(); - mul->gtOtherReg = reg; + GenTreeMultiRegOp* mul = tree->AsMultiRegOp(); + mul->gtOtherReg = reg; + } + else if (tree->OperGet() == GT_COPY) + { + assert(regIdx == 1); + GenTreeCopyOrReload* copy = tree->AsCopyOrReload(); + copy->gtOtherRegs[0] = reg; } else if (tree->OperGet() == GT_PUTARG_SPLIT) { @@ -4139,6 +4145,9 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, // push defs LocationInfoList locationInfoList; LsraLocation defLocation = currentLoc + 1; +#ifdef ARM_SOFTFP + regMaskTP remainingUseCandidates = useCandidates; +#endif for (int i = 0; i < produce; i++) { regMaskTP currCandidates = candidates; @@ -4160,7 +4169,15 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, currCandidates = genFindLowestReg(candidates); candidates &= ~currCandidates; } -#endif +#ifdef ARM_SOFTFP + // If oper is GT_PUTARG_REG, set bits in useCandidates must be in sequential order. + else if (tree->OperGet() == GT_PUTARG_REG || tree->OperGet() == GT_COPY) + { + useCandidates = genFindLowestReg(remainingUseCandidates); + remainingUseCandidates &= ~useCandidates; + } +#endif // ARM_SOFTFP +#endif // _TARGET_ARM_ if (interval == nullptr) { diff --git a/src/coreclr/src/jit/lsraarm.cpp b/src/coreclr/src/jit/lsraarm.cpp index ee356d5..dcfd6c4 100644 --- a/src/coreclr/src/jit/lsraarm.cpp +++ b/src/coreclr/src/jit/lsraarm.cpp @@ -727,6 +727,35 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) info->internalIntCount = 1; break; + case GT_COPY: + info->srcCount = 1; + info->dstCount = 1; +#ifdef ARM_SOFTFP + // This case currently only occurs for double types that are passed as TYP_LONG; + // actual long types would have been decomposed by now. + if (tree->TypeGet() == TYP_LONG) + { + info->dstCount = 2; + } +#endif + break; + + case GT_PUTARG_REG: +#ifdef ARM_SOFTFP + // This case currently only occurs for double types that are passed as TYP_LONG; + // actual long types would have been decomposed by now. + if (tree->TypeGet() == TYP_LONG) + { + info->srcCount = 2; + } + else +#endif + { + info->srcCount = 1; + } + info->dstCount = info->srcCount; + break; + default: #ifdef DEBUG char message[256]; @@ -744,7 +773,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_CLS_VAR_ADDR: case GT_IL_OFFSET: case GT_CNS_INT: - case GT_PUTARG_REG: case GT_PUTARG_STK: case GT_LABEL: case GT_PINVOKE_PROLOG: @@ -752,7 +780,6 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_SETCC: case GT_MEMORYBARRIER: case GT_OBJ: - case GT_COPY: case GT_PUTARG_SPLIT: info->dstCount = tree->IsValue() ? 1 : 0; if (kind & (GTK_CONST | GTK_LEAF)) diff --git a/src/coreclr/src/jit/lsraarmarch.cpp b/src/coreclr/src/jit/lsraarmarch.cpp index e361478..e65cdfe 100644 --- a/src/coreclr/src/jit/lsraarmarch.cpp +++ b/src/coreclr/src/jit/lsraarmarch.cpp @@ -403,7 +403,17 @@ void Lowering::TreeNodeInfoInitPutArgReg( info.srcCount++; // Set the register requirements for the node. - const regMaskTP argMask = genRegMask(argReg); + regMaskTP argMask = genRegMask(argReg); +#ifdef ARM_SOFTFP + // If type of node is `long` then it is actually `double`. + // The actual `long` types must have been transformed as a field list with two fields. + if (node->TypeGet() == TYP_LONG) + { + info.srcCount++; + assert(genRegArgNext(argReg) == REG_NEXT(argReg)); + argMask |= genRegMask(REG_NEXT(argReg)); + } +#endif // ARM_SOFTFP node->gtLsraInfo.setDstCandidates(m_lsra, argMask); node->gtLsraInfo.setSrcCandidates(m_lsra, argMask); -- 2.7.4