From 8acbaf9c77a19ba833e5150b24c79449f31221fb Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Tue, 10 Jan 2017 13:55:47 -0800 Subject: [PATCH] Fix handling of PutArgStk GT_PUTARG_STK doesn't produce a value, so it should have the GTK_NOVALUE flag set. Although the dstCount was being set to zero by the parent call, localDefUse was also being set, causing a register to be allocated. Fixing this produces a number of improvements due to reuse of constant registers that were otherwise unnecessarily "overwritten" by the localDefUse. Also on x86, GT_LONG shouldn't be used to pass a long, since GT_LONG should always be a value-producing node. Instead, use the existing GT_FIELD_LIST approach. --- src/jit/codegenxarch.cpp | 9 ++---- src/jit/gtlist.h | 4 +-- src/jit/lir.cpp | 10 +++++-- src/jit/lower.cpp | 43 ++++++++++------------------- src/jit/lowerxarch.cpp | 71 +++++++++++++++++++----------------------------- src/jit/rationalize.cpp | 2 ++ 6 files changed, 56 insertions(+), 83 deletions(-) diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 8e0af48..45995cc 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -4858,11 +4858,6 @@ void CodeGen::genCallInstruction(GenTreePtr node) if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG)) { #if defined(_TARGET_X86_) - assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG)); - if (arg->OperGet() == GT_LONG) - { - assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK)); - } if ((arg->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp1()->OperGet() == GT_FIELD_LIST)) { fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg); @@ -7782,8 +7777,8 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) GenTreePtr data = putArgStk->gtOp1; - // On a 32-bit target, all of the long arguments have been decomposed into - // a separate putarg_stk for each of the upper and lower halves. + // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LIST, + // and the type of the putArgStk is TYP_VOID. noway_assert(targetType != TYP_LONG); const unsigned argSize = putArgStk->getArgSize(); diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h index 92265a7..d0837c1 100644 --- a/src/jit/gtlist.h +++ b/src/jit/gtlist.h @@ -46,7 +46,7 @@ GTNODE(CNS_STR , "sconst" ,GenTreeStrCon ,0,GTK_LEAF|GTK_CON //----------------------------------------------------------------------------- GTNODE(NOT , "~" ,GenTreeOp ,0,GTK_UNOP) -GTNODE(NOP , "nop" ,GenTree ,0,GTK_UNOP) +GTNODE(NOP , "nop" ,GenTree ,0,GTK_UNOP|GTK_NOVALUE) GTNODE(NEG , "unary -" ,GenTreeOp ,0,GTK_UNOP) GTNODE(COPY , "copy" ,GenTreeCopyOrReload,0,GTK_UNOP) // Copies a variable from its current location to a register that satisfies // code generation constraints. The child is the actual lclVar node. @@ -269,7 +269,7 @@ GTNODE(EMITNOP , "emitnop" ,GenTree ,0,GTK_LEAF|GTK_NOV GTNODE(PINVOKE_PROLOG ,"pinvoke_prolog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq GTNODE(PINVOKE_EPILOG ,"pinvoke_epilog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq GTNODE(PUTARG_REG , "putarg_reg" ,GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register -GTNODE(PUTARG_STK , "putarg_stk" ,GenTreePutArgStk ,0,GTK_UNOP) // operator that places outgoing arg in stack +GTNODE(PUTARG_STK , "putarg_stk" ,GenTreePutArgStk ,0,GTK_UNOP|GTK_NOVALUE) // operator that places outgoing arg in stack GTNODE(RETURNTRAP , "returnTrap" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc GTNODE(SWAP , "swap" ,GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers) GTNODE(IL_OFFSET , "il_offset" ,GenTreeStmt ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes diff --git a/src/jit/lir.cpp b/src/jit/lir.cpp index 35dd181..6eb8a49 100644 --- a/src/jit/lir.cpp +++ b/src/jit/lir.cpp @@ -1494,9 +1494,13 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const } else if (!def->IsValue()) { - // Calls may contain "uses" of nodes that do not produce a value. This is an artifact of - // the HIR and should probably be fixed, but doing so is an unknown amount of work. - assert(node->OperGet() == GT_CALL); + // Stack arguments do not produce a value, but they are considered children of the call. + // It may be useful to remove these from being call operands, but that may also impact + // other code that relies on being able to reach all the operands from a call node. + // The GT_NOP case is because sometimes we eliminate stack argument stores as dead, but + // instead of removing them we replace with a NOP. + assert((node->OperGet() == GT_CALL) && + (def->OperIsStore() || (def->OperGet() == GT_PUTARG_STK) || (def->OperGet() == GT_NOP))); continue; } diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index a6e50b3..d1ecf7c 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -757,16 +757,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP GenTreePtr putArg = nullptr; bool updateArgTable = true; -#if !defined(_TARGET_64BIT_) - if (varTypeIsLong(type)) - { - // For TYP_LONG, we leave the GT_LONG as the arg, and put the putArg below it. - // Therefore, we don't update the arg table entry. - updateArgTable = false; - type = TYP_INT; - } -#endif // !defined(_TARGET_64BIT_) - bool isOnStack = true; #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING if (varTypeIsStruct(type)) @@ -1084,25 +1074,22 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg) NYI("Lowering of long register argument"); } - // For longs, we will create two PUTARG_STKs below the GT_LONG. The hi argument needs to - // be pushed first, so the hi PUTARG_STK will precede the lo PUTARG_STK in execution order. + // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK. + // Although the hi argument needs to be pushed first, that will be handled by the general case, + // in which the fields will be reversed. noway_assert(arg->OperGet() == GT_LONG); - GenTreePtr argLo = arg->gtGetOp1(); - GenTreePtr argHi = arg->gtGetOp2(); - - GenTreePtr putArgLo = NewPutArg(call, argLo, info, type); - GenTreePtr putArgHi = NewPutArg(call, argHi, info, type); - - arg->gtOp.gtOp1 = putArgLo; - arg->gtOp.gtOp2 = putArgHi; - - BlockRange().InsertBefore(arg, putArgHi, putArgLo); - - // The execution order now looks like this: - // argLoPrev <-> argLoFirst ... argLo <-> argHiFirst ... argHi <-> putArgHi <-> putArgLo <-> arg(GT_LONG) - - assert((arg->gtFlags & GTF_REVERSE_OPS) == 0); - arg->gtFlags |= GTF_REVERSE_OPS; // We consume the high arg (op2) first. + assert(info->numSlots == 2); + GenTreePtr argLo = arg->gtGetOp1(); + GenTreePtr argHi = arg->gtGetOp2(); + GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr); + // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence. + (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList); + putArg = NewPutArg(call, fieldList, info, TYP_VOID); + + // We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg. + BlockRange().InsertBefore(arg, fieldList, putArg); + BlockRange().Remove(arg); + *ppArg = putArg; } else #endif // !defined(_TARGET_64BIT_) diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index bf5d29c..50ddf69 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -1024,7 +1024,7 @@ void Lowering::TreeNodeInfoInitSimple(GenTree* tree) { TreeNodeInfo* info = &(tree->gtLsraInfo); unsigned kind = tree->OperKind(); - info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1; + info->dstCount = tree->IsValue() ? 1 : 0; if (kind & (GTK_CONST | GTK_LEAF)) { info->srcCount = 0; @@ -1589,53 +1589,38 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call) if (!(args->gtFlags & GTF_LATE_ARG)) { TreeNodeInfo* argInfo = &(arg->gtLsraInfo); -#if !defined(_TARGET_64BIT_) - if (arg->TypeGet() == TYP_LONG) + if (argInfo->dstCount != 0) { - assert(arg->OperGet() == GT_LONG); - GenTreePtr loArg = arg->gtGetOp1(); - GenTreePtr hiArg = arg->gtGetOp2(); - assert((loArg->OperGet() == GT_PUTARG_STK) && (hiArg->OperGet() == GT_PUTARG_STK)); - assert((loArg->gtLsraInfo.dstCount == 1) && (hiArg->gtLsraInfo.dstCount == 1)); - loArg->gtLsraInfo.isLocalDefUse = true; - hiArg->gtLsraInfo.isLocalDefUse = true; + argInfo->isLocalDefUse = true; } - else -#endif // !defined(_TARGET_64BIT_) - { - if (argInfo->dstCount != 0) - { - argInfo->isLocalDefUse = true; - } - // If the child of GT_PUTARG_STK is a constant, we don't need a register to - // move it to memory (stack location). - // - // On AMD64, we don't want to make 0 contained, because we can generate smaller code - // by zeroing a register and then storing it. E.g.: - // xor rdx, rdx - // mov gword ptr [rsp+28H], rdx - // is 2 bytes smaller than: - // mov gword ptr [rsp+28H], 0 - // - // On x86, we push stack arguments; we don't use 'mov'. So: - // push 0 - // is 1 byte smaller than: - // xor rdx, rdx - // push rdx - - argInfo->dstCount = 0; - if (arg->gtOper == GT_PUTARG_STK) - { - GenTree* op1 = arg->gtOp.gtOp1; - if (IsContainableImmed(arg, op1) + // If the child of GT_PUTARG_STK is a constant, we don't need a register to + // move it to memory (stack location). + // + // On AMD64, we don't want to make 0 contained, because we can generate smaller code + // by zeroing a register and then storing it. E.g.: + // xor rdx, rdx + // mov gword ptr [rsp+28H], rdx + // is 2 bytes smaller than: + // mov gword ptr [rsp+28H], 0 + // + // On x86, we push stack arguments; we don't use 'mov'. So: + // push 0 + // is 1 byte smaller than: + // xor rdx, rdx + // push rdx + + argInfo->dstCount = 0; + if (arg->gtOper == GT_PUTARG_STK) + { + GenTree* op1 = arg->gtOp.gtOp1; + if (IsContainableImmed(arg, op1) #if defined(_TARGET_AMD64_) - && !op1->IsIntegralConst(0) + && !op1->IsIntegralConst(0) #endif // _TARGET_AMD64_ - ) - { - MakeSrcContained(arg, op1); - } + ) + { + MakeSrcContained(arg, op1); } } } diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index 7c635ca..c648ab2 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -7,6 +7,7 @@ #pragma hdrstop #endif +#ifndef LEGACY_BACKEND // state carried over the tree walk, to be used in making // a splitting decision. struct SplitData @@ -1054,3 +1055,4 @@ void Rationalizer::DoPhase() comp->compRationalIRForm = true; } +#endif // LEGACY_BACKEND -- 2.7.4