From: Carol Eidt Date: Thu, 28 Mar 2019 18:23:10 +0000 (-0700) Subject: Struct & SIMD improvements (#22255) X-Git-Tag: accepted/tizen/unified/20190813.215958~54^2~53 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3d4a1d5cea0ae71eed1482990ce6e575049829d8;p=platform%2Fupstream%2Fcoreclr.git Struct & SIMD improvements (#22255) * [WIP] Struct & SIMD improvements - Enable CSE of struct values when handle is available (and add code to get the handle of HW SIMD types) - Don't require block nodes for SIMD assignments - Don't set `GTF_GLOB_REF` on `GT_OBJ` if it is local - Set `lvRegStruct` on promoted SIMD fields - Add tests for #19910 (fixed with this PR) and #3539 & #19438 (fixed with #21314) - Additional cleanup Fix #19910 --- diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 8f31deb..03e171b 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -4534,15 +4534,7 @@ void CodeGen::genCheckUseBlockInit() unless they are untracked GC type or structs that contain GC pointers */ CLANG_FORMAT_COMMENT_ANCHOR; -#if FEATURE_SIMD - // TODO-1stClassStructs - // This is here to duplicate previous behavior, where TYP_SIMD8 locals - // were not being re-typed correctly. - if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) && -#else // !FEATURE_SIMD - if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) && -#endif // !FEATURE_SIMD - varDsc->lvOnFrame && + if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) && varDsc->lvOnFrame && (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0))) { varDsc->lvMustInit = true; diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 2b65206..93de32a 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -16529,15 +16529,45 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) if (varTypeIsSIMD(tree)) { structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT); +#ifdef FEATURE_HW_INTRINSICS + if (structHnd == NO_CLASS_HANDLE) + { + structHnd = gtGetStructHandleForHWSIMD(tree->gtType, TYP_FLOAT); + } +#endif } else #endif { + // Attempt to find a handle for this expression. + // We can do this for an array element indirection, or for a field indirection. ArrayInfo arrInfo; if (TryGetArrayInfo(tree->AsIndir(), &arrInfo)) { structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType); } + else + { + GenTree* addr = tree->AsIndir()->Addr(); + if ((addr->OperGet() == GT_ADD) && addr->gtGetOp2()->OperIs(GT_CNS_INT)) + { + FieldSeqNode* fieldSeq = addr->gtGetOp2()->AsIntCon()->gtFieldSeq; + + if (fieldSeq != nullptr) + { + while (fieldSeq->m_next != nullptr) + { + fieldSeq = fieldSeq->m_next; + } + if (fieldSeq != FieldSeqStore::NotAField() && !fieldSeq->IsPseudoField()) + { + CORINFO_FIELD_HANDLE fieldHnd = fieldSeq->m_fieldHnd; + CorInfoType fieldCorType = info.compCompHnd->getFieldType(fieldHnd, &structHnd); + assert(fieldCorType == CORINFO_TYPE_VALUECLASS); + } + } + } + } } break; #ifdef FEATURE_SIMD diff --git a/src/jit/gentree.h b/src/jit/gentree.h index ef3bca2..6893c35 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -4712,22 +4712,28 @@ struct GenTreeObj : public GenTreeBlk } } + void Init() + { + // By default, an OBJ is assumed to be a global reference, unless it is local. + GenTreeLclVarCommon* lcl = Addr()->IsLocalAddrExpr(); + if ((lcl == nullptr) || ((lcl->gtFlags & GTF_GLOB_EFFECT) != 0)) + { + gtFlags |= GTF_GLOB_REF; + } + noway_assert(gtClass != NO_CLASS_HANDLE); + _gtGcPtrCount = UINT32_MAX; + } + GenTreeObj(var_types type, GenTree* addr, CORINFO_CLASS_HANDLE cls, unsigned size) : GenTreeBlk(GT_OBJ, type, addr, size), gtClass(cls) { - // By default, an OBJ is assumed to be a global reference. - gtFlags |= GTF_GLOB_REF; - noway_assert(cls != NO_CLASS_HANDLE); - _gtGcPtrCount = UINT32_MAX; + Init(); } GenTreeObj(var_types type, GenTree* addr, GenTree* data, CORINFO_CLASS_HANDLE cls, unsigned size) : GenTreeBlk(GT_STORE_OBJ, type, addr, data, size), gtClass(cls) { - // By default, an OBJ is assumed to be a global reference. - gtFlags |= GTF_GLOB_REF; - noway_assert(cls != NO_CLASS_HANDLE); - _gtGcPtrCount = UINT32_MAX; + Init(); } #if DEBUGGABLE_GENTREE diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index ab99a5c..74ec369 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -618,21 +618,8 @@ inline void Compiler::impAppendStmt(GenTree* stmt, unsigned chkLevel) // Since we don't know what it assigns to, we need to spill global refs. spillGlobEffects = true; } - else if (!expr->OperIsBlkOp()) + else if ((lhs->gtFlags & GTF_GLOB_REF) != 0) { - // If we are assigning to a global ref, we have to spill global refs on stack - if ((lhs->gtFlags & GTF_GLOB_REF) != 0) - { - spillGlobEffects = true; - } - } - else if ((lhs->OperIsBlk() && !lhs->AsBlk()->HasGCPtr()) || - ((lhs->OperGet() == GT_LCL_VAR) && - (lvaTable[lhs->AsLclVarCommon()->gtLclNum].lvStructGcCount == 0))) - { - // TODO-1stClassStructs: Previously, spillGlobEffects was set to true for - // GT_INITBLK and GT_COPYBLK, but this is overly conservative, and should be - // revisited. (Note that it was NOT set to true for GT_COPYOBJ.) spillGlobEffects = true; } } @@ -1389,8 +1376,6 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, } if (dest == nullptr) { - // TODO-1stClassStructs: We shouldn't really need a block node as the destination - // if this is a known struct type. if (asgType == TYP_STRUCT) { dest = gtNewObjNode(structHnd, destAddr); @@ -1401,10 +1386,6 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, dest->gtFlags &= ~GTF_GLOB_REF; dest->gtFlags |= (destAddr->gtFlags & GTF_GLOB_REF); } - else if (varTypeIsStruct(asgType)) - { - dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, destAddr, genTypeSize(asgType)); - } else { dest = gtNewOperNode(GT_IND, asgType, destAddr); @@ -14527,9 +14508,8 @@ void Compiler::impImportBlockCode(BasicBlock* block) assert(!"Unexpected fieldAccessor"); } - // Create the member assignment, unless we have a struct. - // TODO-1stClassStructs: This could be limited to TYP_STRUCT, to avoid extra copies. - bool deferStructAssign = varTypeIsStruct(lclTyp); + // Create the member assignment, unless we have a TYP_STRUCT. + bool deferStructAssign = (lclTyp == TYP_STRUCT); if (!deferStructAssign) { @@ -16220,10 +16200,6 @@ GenTree* Compiler::impAssignSmallStructTypeToVar(GenTree* op, CORINFO_CLASS_HAND unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for small struct return.")); impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_ALL); GenTree* ret = gtNewLclvNode(tmpNum, lvaTable[tmpNum].lvType); - - // TODO-1stClassStructs: Handle constant propagation and CSE-ing of small struct returns. - ret->gtFlags |= GTF_DONT_CSE; - return ret; } diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 722e18f..0f6cdf2 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -2175,6 +2175,9 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum) // Set size to zero so that lvaSetStruct will appropriately set the SIMD-relevant fields. fieldVarDsc->lvExactSize = 0; compiler->lvaSetStruct(varNum, pFieldInfo->fldTypeHnd, false, true); + // We will not recursively promote this, so mark it as 'lvRegStruct' (note that we wouldn't + // be promoting this if we didn't think it could be enregistered. + fieldVarDsc->lvRegStruct = true; } #endif // FEATURE_SIMD @@ -7047,8 +7050,6 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r printf(" V%02u.%s(offs=0x%02x)", varDsc->lvParentLcl, eeGetFieldName(fldHnd), varDsc->lvFldOffset); lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc); - // We should never have lvIsStructField set if it is a reg-sized non-field-addressed struct. - assert(!varDsc->lvRegStruct); switch (promotionType) { case PROMOTION_TYPE_NONE: diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp index 2fb225e..99f4db7 100644 --- a/src/jit/liveness.cpp +++ b/src/jit/liveness.cpp @@ -2160,14 +2160,14 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree, addrNode = tree; } - // Next, find the assignment. + // Next, find the assignment (i.e. if we didn't have a LocalStore) if (asgNode == nullptr) { if (addrNode == nullptr) { asgNode = nextNode; } - else if (asgNode == nullptr) + else { // This may be followed by GT_IND/assign or GT_STOREIND. if (nextNode == nullptr) @@ -2180,19 +2180,22 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree, assert(nextNode->OperGet() != GT_NULLCHECK); if (nextNode->OperIsStore()) { + // This is a store, which takes a location and a value to be stored. + // It's 'rhsNode' is the value to be stored. asgNode = nextNode; if (asgNode->OperIsBlk()) { rhsNode = asgNode->AsBlk()->Data(); } - // TODO-1stClassStructs: There should be an else clause here to handle - // the non-block forms of store ops (GT_STORE_LCL_VAR, etc.) for which - // rhsNode is op1. (This isn't really a 1stClassStructs item, but the - // above was added to catch what used to be dead block ops, and that - // made this omission apparent.) + else + { + // This is a non-block store. + rhsNode = asgNode->gtGetOp2(); + } } else { + // This is a non-store indirection, and the assignment will com after it. asgNode = nextNode->gtNext; } } diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index fc507c4..01f181f 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -1765,11 +1765,7 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call) if (op1->OperGet() == GT_LCL_VAR_ADDR) { unsigned lclNum = op1->AsLclVarCommon()->GetLclNum(); - // TODO-1stClassStructs: This is here to duplicate previous behavior, - // but is not needed because the scenario being quirked did not involve - // a SIMD or enregisterable struct. - // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT) - if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet())) + if (comp->lvaGetDesc(lclNum)->TypeGet() == TYP_STRUCT) { // First arg is addr of a struct local. paddingNeeded = true; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index fb6b3f4..78f43eb 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -2153,16 +2153,10 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry) // Create an Obj of the temp to use it as a call argument. arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); - - // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here; - // this is only to preserve former behavior (though some CSE'ing of struct - // values can be pessimizing, so enabling this may require some additional tuning). - arg->gtFlags |= GTF_DONT_CSE; } #else // Always create an Obj of the temp to use it as a call argument. arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg); - arg->gtFlags |= GTF_DONT_CSE; #endif // !_TARGET_ARM64_ #endif // FEATURE_MULTIREG_ARGS } @@ -6615,14 +6609,6 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac) GenTree* res = fgMorphSmpOp(tree); - // If we have a struct type, this node would previously have been under a GT_ADDR, - // and therefore would have been marked GTF_DONT_CSE. - // TODO-1stClassStructs: revisit this. - if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal) - { - res->gtFlags |= GTF_DONT_CSE; - } - if (fldOffset == 0 && res->OperGet() == GT_IND) { GenTree* addr = res->gtOp.gtOp1; @@ -9000,24 +8986,8 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) bool isCopyBlock = asg->OperIsCopyBlkOp(); bool isInitBlock = !isCopyBlock; - unsigned size; + unsigned size = 0; CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE; -#ifdef FEATURE_SIMD - // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD/GT_HWIntrinsic) - // The SIMD type in question could be Vector2f which is 8-bytes in size. - // The below check is to make sure that we don't turn that copyblk - // into a assignment, since rationalizer logic will transform the - // copyblk appropriately. Otherwise, the transformation made in this - // routine will prevent rationalizer logic and we might end up with - // GT_ADDR(GT_SIMD/GT_HWIntrinsic) node post rationalization, leading to a noway assert - // in codegen. - // TODO-1stClassStructs: This is here to preserve old behavior. - // It should be eliminated. - if (src->OperIsSimdOrHWintrinsic()) - { - return nullptr; - } -#endif if (dest->gtEffectiveVal()->OperIsBlk()) { @@ -9037,24 +9007,43 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) { // Is this an enregisterable struct that is already a simple assignment? // This can happen if we are re-morphing. - if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock) + // Note that we won't do this straightaway if this is a SIMD type, since it + // may be a promoted lclVar (sometimes we promote the individual float fields of + // fixed-size SIMD). + if (dest->OperGet() == GT_IND) { - return tree; + noway_assert(asgType != TYP_STRUCT); + if (varTypeIsStruct(asgType)) + { + destLclVarTree = fgIsIndirOfAddrOfLocal(dest); + } + if (isCopyBlock && destLclVarTree == nullptr && !src->OperIs(GT_LCL_VAR)) + { + fgMorphBlockOperand(src, asgType, genTypeSize(asgType), false /*isDest*/); + return tree; + } } - noway_assert(dest->OperIsLocal()); - destLclVarTree = dest; - destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum; - destVarDsc = &(lvaTable[destVarNum]); - if (isCopyBlock) + else { - clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle(); - size = info.compCompHnd->getClassSize(clsHnd); + noway_assert(dest->OperIsLocal()); + destLclVarTree = dest; } - else + if (destLclVarTree != nullptr) + { + destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum; + destVarDsc = &(lvaTable[destVarNum]); + if (asgType == TYP_STRUCT) + { + clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle(); + size = destVarDsc->lvExactSize; + } + } + if (asgType != TYP_STRUCT) { - size = destVarDsc->lvExactSize; + size = genTypeSize(asgType); } } + noway_assert((size != 0) || dest->OperIs(GT_DYN_BLK)); // // See if we can do a simple transformation: @@ -9161,7 +9150,7 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) // holes, whose contents could be meaningful in unsafe code. If we decide that's a valid // concern, then we could compromise, and say that address-exposed + fields do not completely cover the // memory of the struct prevent field-wise assignments. Same situation exists for the "src" decision. - if (varTypeIsStruct(destLclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType())) + if (varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted) { // Let fgMorphInitBlock handle it. (Since we'll need to do field-var-wise assignments.) return nullptr; @@ -9256,11 +9245,9 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) else { // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar)) - // or indir(lclVarAddr) in rational form and liveness won't account for these uses. That said, - // we have to mark this local as address exposed so we don't delete it as a dead store later on. - unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum; - lvaTable[lclVarNum].lvAddrExposed = true; - lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed)); + // or indir(lclVarAddr) so it must be on the stack. + unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum; + lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_BlockOp)); GenTree* srcAddr; if (src == srcLclVarTree) { @@ -9289,9 +9276,8 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) src->SetIndirExceptionFlags(this); } } - else + else // InitBlk { -// InitBlk #if FEATURE_SIMD if (varTypeIsSIMD(asgType)) { @@ -10022,11 +10008,17 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne { if (indirTree != nullptr) { - // We should never find a struct indirection on the lhs of an assignment. - assert(!isDest || indirTree->OperIsBlk()); - if (!isDest && indirTree->OperIsBlk()) + if (indirTree->OperIsBlk()) + { + if (!isDest || (asgType != TYP_STRUCT)) + { + (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType); + } + } + else { - (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType); + // We should never find a TYP_STRUCT GT_IND on the lhs of an assignment. + assert(!isDest || (asgType != TYP_STRUCT)); } } else @@ -10047,14 +10039,6 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne { gtSetObjGcInfo(newTree->AsObj()); } - if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0)) - { - // This is not necessarily a global reference, though gtNewObjNode always assumes it is. - // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor, - // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled - // separately now to avoid excess diffs. - newTree->gtFlags &= ~(GTF_GLOB_EFFECT); - } } } else @@ -10530,11 +10514,11 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) } } + var_types asgType = dest->TypeGet(); if (requiresCopyBlock) { - var_types asgType = dest->TypeGet(); - dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/); - asg->gtOp.gtOp1 = dest; + dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/); + asg->gtOp.gtOp1 = dest; asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT); // Note that the unrolling of CopyBlk is only implemented on some platforms. @@ -10557,21 +10541,6 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/); asg->gtOp.gtOp2 = rhs; - // Formerly, liveness did not consider copyblk arguments of simple types as being - // a use or def, so these variables were marked as address-exposed. - // TODO-1stClassStructs: This should no longer be needed. - if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar)) - { - JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum); - lvaTable[srcLclNum].lvAddrExposed = true; - } - - if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar)) - { - JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum); - lvaTable[destLclNum].lvAddrExposed = true; - } - goto _Done; } @@ -10601,7 +10570,7 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) else if (destDoFldAsg) { fieldCnt = destLclVar->lvFieldCnt; - rhs = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/); + rhs = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*isDest*/); if (srcAddr == nullptr) { srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */); @@ -10611,7 +10580,7 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) { assert(srcDoFldAsg); fieldCnt = srcLclVar->lvFieldCnt; - dest = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/); + dest = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/); if (dest->OperIsBlk()) { (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT); @@ -10692,6 +10661,9 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) if (addrSpill != nullptr) { + // Simplify the address if possible, and mark as DONT_CSE as needed.. + addrSpill = fgMorphTree(addrSpill); + // Spill the (complex) address to a BYREF temp. // Note, at most one address may need to be spilled. addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local")); @@ -10816,9 +10788,6 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) noway_assert(srcLclVarTree != nullptr); src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK; - // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE, - // but they are when they are under a GT_ADDR. - src->gtFlags |= GTF_DONT_CSE; } else { @@ -18936,12 +18905,8 @@ bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTree* st } #endif - // TODO-1stClassStructs: we should be able to simply use a GT_IND here. - GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize); - blkNode->gtType = simdType; - tree = gtNewBlkOpNode(blkNode, simdStructNode, simdSize, - false, // not volatile - true); // copyBlock + GenTree* dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst); + tree = gtNewAssignNode(dstNode, simdStructNode); stmt->gtStmt.gtStmtExpr = tree; diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp index 8ef5ae7..fd20c77 100644 --- a/src/jit/optcse.cpp +++ b/src/jit/optcse.cpp @@ -1415,8 +1415,8 @@ public: if (!varTypeIsFloating(varTyp)) { - // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior. - // Note that this makes genTypeStSz return 1. + // TODO-1stClassStructs: Revisit this; it is here to duplicate previous behavior. + // Note that this makes genTypeStSz return 1, but undoing it pessimizes some code. if (varTypeIsStruct(varTyp)) { varTyp = TYP_STRUCT; @@ -1754,6 +1754,22 @@ public: // Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount(); + bool canEnregister = true; + unsigned slotCount = 1; + var_types cseLclVarTyp = genActualType(candidate->Expr()->TypeGet()); + if (candidate->Expr()->TypeGet() == TYP_STRUCT) + { + // This is a non-enregisterable struct. + canEnregister = false; + GenTree* value = candidate->Expr(); + CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(candidate->Expr()); + assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT)); + unsigned size = m_pCompiler->info.compCompHnd->getClassSize(structHnd); + // Note that the slotCount is used to estimate the reference cost, but it may overestimate this + // because it doesn't take into account that we might use a vector register for struct copies. + slotCount = (size + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE; + } + if (CodeOptKind() == Compiler::SMALL_CODE) { if (cseRefCnt >= aggressiveRefCnt) @@ -1764,10 +1780,10 @@ public: printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt); } #endif - cse_def_cost = 1; - cse_use_cost = 1; + cse_def_cost = slotCount; + cse_use_cost = slotCount; - if (candidate->LiveAcrossCall() != 0) + if (candidate->LiveAcrossCall() || !canEnregister) { if (largeFrame) { @@ -1796,13 +1812,13 @@ public: #else // _TARGET_ARM_ if (hugeFrame) { - cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10] - cse_use_cost = 12; + cse_def_cost = 10 + (2 * slotCount); // movw/movt r10 and str reg,[sp+r10] + cse_use_cost = 10 + (2 * slotCount); } else { - cse_def_cost = 8; // movw r10 and str reg,[sp+r10] - cse_use_cost = 8; + cse_def_cost = 6 + (2 * slotCount); // movw r10 and str reg,[sp+r10] + cse_use_cost = 6 + (2 * slotCount); } #endif } @@ -1816,17 +1832,17 @@ public: #endif #ifdef _TARGET_XARCH_ /* The following formula is good choice when optimizing CSE for SMALL_CODE */ - cse_def_cost = 3; // mov [EBP-1C],reg - cse_use_cost = 2; // [EBP-1C] -#else // _TARGET_ARM_ - cse_def_cost = 2; // str reg,[sp+0x9c] - cse_use_cost = 2; // ldr reg,[sp+0x9c] + cse_def_cost = 3 * slotCount; // mov [EBP-1C],reg + cse_use_cost = 2 * slotCount; // [EBP-1C] +#else // _TARGET_ARM_ + cse_def_cost = 2 * slotCount; // str reg,[sp+0x9c] + cse_use_cost = 2 * slotCount; // ldr reg,[sp+0x9c] #endif } } else // not SMALL_CODE ... { - if (cseRefCnt >= aggressiveRefCnt) + if ((cseRefCnt >= aggressiveRefCnt) && canEnregister) { #ifdef DEBUG if (m_pCompiler->verbose) @@ -1834,13 +1850,13 @@ public: printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt); } #endif - cse_def_cost = 1; - cse_use_cost = 1; + cse_def_cost = slotCount; + cse_use_cost = slotCount; } else if (cseRefCnt >= moderateRefCnt) { - if (candidate->LiveAcrossCall() == 0) + if (!candidate->LiveAcrossCall() && canEnregister) { #ifdef DEBUG if (m_pCompiler->verbose) @@ -1852,7 +1868,7 @@ public: cse_def_cost = 2; cse_use_cost = 1; } - else // candidate is live across call + else // candidate is live across call or not enregisterable. { #ifdef DEBUG if (m_pCompiler->verbose) @@ -1860,15 +1876,15 @@ public: printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt); } #endif - cse_def_cost = 2; - cse_use_cost = 2; + cse_def_cost = 2 * slotCount; + cse_use_cost = 2 * slotCount; extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller // saved register } } else // Conservative CSE promotion { - if (candidate->LiveAcrossCall() == 0) + if (!candidate->LiveAcrossCall() && canEnregister) { #ifdef DEBUG if (m_pCompiler->verbose) @@ -1888,8 +1904,8 @@ public: printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt); } #endif - cse_def_cost = 3; - cse_use_cost = 3; + cse_def_cost = 3 * slotCount; + cse_use_cost = 3 * slotCount; extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller // saved register } @@ -1897,8 +1913,8 @@ public: // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED) { - cse_def_cost++; - cse_use_cost++; + cse_def_cost += slotCount; + cse_use_cost += slotCount; } } @@ -2031,7 +2047,20 @@ public: var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet()); if (varTypeIsStruct(cseLclVarTyp)) { - m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false); + // After call args have been morphed, we don't need a handle for SIMD types. + // They are only required where the size is not implicit in the type and/or there are GC refs. + CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(successfulCandidate->Expr()); + assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT)); + if (structHnd != NO_CLASS_HANDLE) + { + m_pCompiler->lvaSetStruct(cseLclVarNum, structHnd, false); + } +#ifdef FEATURE_SIMD + else if (varTypeIsSIMD(cseLclVarTyp)) + { + m_pCompiler->lvaGetDesc(cseLclVarNum)->lvSIMDType = true; + } +#endif // FEATURE_SIMD } m_pCompiler->lvaTable[cseLclVarNum].lvType = cseLclVarTyp; m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true; @@ -2305,14 +2334,26 @@ public: GenTree* val = exp; /* Create an assignment of the value to the temp */ - GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val); + GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val); + GenTree* origAsg = asg; + + if (!asg->OperIs(GT_ASG)) + { + // This can only be the case for a struct in which the 'val' was a COMMA, so + // the assignment is sunk below it. + asg = asg->gtEffectiveVal(true); + noway_assert(origAsg->OperIs(GT_COMMA) && (origAsg == val)); + } + else + { + noway_assert(asg->gtOp.gtOp2 == val); + } // assign the proper Value Numbers asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void asg->gtOp.gtOp1->gtVNPair = val->gtVNPair; // The dest op is the same as 'val' noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR); - noway_assert(asg->gtOp.gtOp2 == val); /* Create a reference to the CSE temp */ GenTree* ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); @@ -2325,7 +2366,7 @@ public: } /* Create a comma node for the CSE assignment */ - cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref); + cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, origAsg, ref); cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val' // as the assignment to the CSE LclVar // cannot add any new exceptions @@ -2536,16 +2577,17 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) return false; } - /* The only reason a TYP_STRUCT tree might occur is as an argument to - GT_ADDR. It will never be actually materialized. So ignore them. - Also TYP_VOIDs */ - var_types type = tree->TypeGet(); genTreeOps oper = tree->OperGet(); - // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming - // to use regular assignments, or handling copyObj. - if (varTypeIsStruct(type) || type == TYP_VOID) + if (type == TYP_VOID) + { + return false; + } + + // If this is a struct type, we can only consider it for CSE-ing if we can get at + // its handle, so that we can create a temp. + if ((type == TYP_STRUCT) && (gtGetStructHandleIfPresent(tree) == NO_CLASS_HANDLE)) { return false; } diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp index 1cdb54f..d51634b 100644 --- a/src/jit/rationalize.cpp +++ b/src/jit/rationalize.cpp @@ -552,7 +552,7 @@ void Rationalizer::RewriteAddress(LIR::Use& use) JITDUMP("\n"); } -Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack& parentStack) +Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parentStack) { assert(useEdge != nullptr); @@ -755,39 +755,21 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStackIsTargetIntrinsic(node->gtIntrinsic.gtIntrinsicId)); break; -#ifdef FEATURE_SIMD case GT_BLK: + // We should only see GT_BLK for TYP_STRUCT. + assert(node->TypeGet() == TYP_STRUCT); + break; + case GT_OBJ: - { - // TODO-1stClassStructs: These should have been transformed to GT_INDs, but in order - // to preserve existing behavior, we will keep this as a block node if this is the - // lhs of a block assignment, and either: - // - It is a "generic" TYP_STRUCT assignment, OR - // - It is an initblk, OR - // - Neither the lhs or rhs are known to be of SIMD type. - - GenTree* parent = use.User(); - bool keepBlk = false; - if ((parent->OperGet() == GT_ASG) && (node == parent->gtGetOp1())) + assert(node->TypeGet() == TYP_STRUCT || !use.User()->OperIsInitBlkOp()); + if (varTypeIsSIMD(node)) { - if ((node->TypeGet() == TYP_STRUCT) || parent->OperIsInitBlkOp()) - { - keepBlk = true; - } - else if (!comp->isAddrOfSIMDType(node->AsBlk()->Addr())) - { - GenTree* dataSrc = parent->gtGetOp2(); - if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD) && (!dataSrc->OperIsHWIntrinsic())) - { - noway_assert(dataSrc->OperIsIndir()); - keepBlk = !comp->isAddrOfSIMDType(dataSrc->AsIndir()->Addr()); - } - } + // Rewrite these as GT_IND. + RewriteSIMDOperand(use, false); } - RewriteSIMDOperand(use, keepBlk); - } - break; + break; +#ifdef FEATURE_SIMD case GT_SIMD: { noway_assert(comp->featureSIMD); diff --git a/src/jit/rationalize.h b/src/jit/rationalize.h index 41404a2..297db36 100644 --- a/src/jit/rationalize.h +++ b/src/jit/rationalize.h @@ -47,14 +47,14 @@ private: #endif GenTreeArgList* args); - void RewriteIntrinsicAsUserCall(GenTree** use, ArrayStack& parents); + void RewriteIntrinsicAsUserCall(GenTree** use, Compiler::GenTreeStack& parents); // Other transformations void RewriteAssignment(LIR::Use& use); void RewriteAddress(LIR::Use& use); // Root visitor - Compiler::fgWalkResult RewriteNode(GenTree** useEdge, ArrayStack& parents); + Compiler::fgWalkResult RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parents); }; inline Rationalizer::Rationalizer(Compiler* _comp) : Phase(_comp, "IR Rationalize", PHASE_RATIONALIZE) diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp index 1281184..ea15a9d 100644 --- a/src/jit/simd.cpp +++ b/src/jit/simd.cpp @@ -2967,6 +2967,10 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, op2 = gtCloneExpr(index); } + // For the non-constant case, we don't want to CSE the SIMD value, as we will just need to store + // it to the stack to do the indexing anyway. + op1->gtFlags |= GTF_DONT_CSE; + GenTree* lengthNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength); GenTreeBoundsChk* simdChk = new (this, GT_SIMD_CHK) GenTreeBoundsChk(GT_SIMD_CHK, TYP_VOID, index, lengthNode, SCK_RNGCHK_FAIL); diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.cs b/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.cs new file mode 100644 index 0000000..b57cae9 --- /dev/null +++ b/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.cs @@ -0,0 +1,126 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Collections.Generic; +using System.Diagnostics; + +// This test executes the same computation on a wrapped Vector4 ('float4') and a +// (not wrapped) Vector4. The code should be similar. +// This was a perf regression issue, so this test is mostly useful for running +// asm diffs. + +namespace GitHub_19438 +{ + class Program + { + struct float4 + { + public Vector4 v; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public float4(float _x, float _y, float _z, float _w) + { + v = new Vector4(_x, _y, _z, _w); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public float4(Vector4 _v) + { + v = _v; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float4 operator +(float4 a, float4 b) + { + return new float4(a.v + b.v); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float4 operator -(float4 a, float4 b) + { + return new float4(a.v - b.v); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float4 operator *(float4 a, float4 b) + { + return new float4(a.v * b.v); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float4 operator /(float4 a, float4 b) + { + return new float4(a.v / b.v); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public override string ToString() + { + return v.ToString(); + } + } + + static int Main(string[] args) + { + const int iterationCount = 10; + const int itemCount = 1000000; + + long totalTaskTime = 0; + + // First, use a wrapped Vector4. + List items = new List(itemCount); + for (int iteration = 0; iteration < iterationCount; ++iteration) + { + var taskTimer = Stopwatch.StartNew(); + + for (int item = 0; item < itemCount; ++item) + { + float4 v0 = new float4(1.0f, 2.0f, 3.0f, 4.0f); + float4 v1 = new float4(5.0f, 6.0f, 7.0f, 8.0f); + float4 v2 = (v0 * v1) - (v1 / v0 + v1); + float4 v3 = (v2 * v0) - (v2 / v0 + v1); + + items.Add(v2 * v3); + } + + taskTimer.Stop(); + totalTaskTime += taskTimer.ElapsedMilliseconds; + + items.Clear(); + GC.Collect(); + } + Console.WriteLine("Wrapped Average Time: " + totalTaskTime / iterationCount + "ms"); + + // Now, a plain Vector4 + totalTaskTime = 0; + List items2 = new List(itemCount); + for (int iteration = 0; iteration < iterationCount; ++iteration) + { + var taskTimer = Stopwatch.StartNew(); + + for (int item = 0; item < itemCount; ++item) + { + Vector4 v0 = new Vector4(1.0f, 2.0f, 3.0f, 4.0f); + Vector4 v1 = new Vector4(5.0f, 6.0f, 7.0f, 8.0f); + Vector4 v2 = (v0 * v1) - (v1 / v0 + v1); + Vector4 v3 = (v2 * v0) - (v2 / v0 + v1); + + items2.Add(v2 * v3); + } + + taskTimer.Stop(); + totalTaskTime += taskTimer.ElapsedMilliseconds; + + items2.Clear(); + GC.Collect(); + } + Console.WriteLine("Vector4 Average Time: " + totalTaskTime / iterationCount + "ms"); + + return 100; + } + } +} diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.csproj b/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.csproj new file mode 100644 index 0000000..d86ed9f --- /dev/null +++ b/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.csproj @@ -0,0 +1,17 @@ + + + + + Release + AnyCPU + $(MSBuildProjectName) + Exe + + True + + + + + + + \ No newline at end of file diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.cs b/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.cs new file mode 100644 index 0000000..bb2d536 --- /dev/null +++ b/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.cs @@ -0,0 +1,40 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Runtime.CompilerServices; + +namespace GitHub_19910 +{ + class Program + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static void SwapNonGeneric(ref Vector128 a, ref Vector128 b) + { + Vector128 tmp = a; a = b; b = tmp; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int Main() + { + if (Sse2.IsSupported) + { + Vector128 a = Sse2.ConvertScalarToVector128UInt32(0xA); + Vector128 b = Sse2.ConvertScalarToVector128UInt32(0xB); + + Vector128 tmp = a; a = b; b = tmp; // in-place version + SwapNonGeneric(ref a, ref b); // inlined version + + if ((Sse2.ConvertToUInt32(a) != 0xA) || (Sse2.ConvertToUInt32(b) != 0xB)) + { + Console.WriteLine("A={0}, B={1}", Sse2.ConvertToUInt32(a), Sse2.ConvertToUInt32(b)); + return -1; + } + } + return 100; + } + } +} diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.csproj b/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.csproj new file mode 100644 index 0000000..d86ed9f --- /dev/null +++ b/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.csproj @@ -0,0 +1,17 @@ + + + + + Release + AnyCPU + $(MSBuildProjectName) + Exe + + True + + + + + + + \ No newline at end of file diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.cs b/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.cs new file mode 100644 index 0000000..8fc6c68 --- /dev/null +++ b/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.cs @@ -0,0 +1,62 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; + +namespace GitHub_19910 + +{ + class Program + { + public struct Bgr { public byte B; public byte G; public byte R; } + + public class BasicReadWriteBenchmark + where T : struct + { + + // NOTE: This includes cost of stack alloc + [MethodImpl(MethodImplOptions.NoInlining)] + static public void ReadFromStack() + { + unsafe + { + void* stackPtr = stackalloc byte[Unsafe.SizeOf()]; + + var value = Unsafe.Read(stackPtr); + } + } + + // NOTE: This includes cost of stack alloc + [MethodImpl(MethodImplOptions.NoInlining)] + static public void WriteToStack() + { + unsafe + { + void* stackPtr = stackalloc byte[Unsafe.SizeOf()]; + + T value = default(T); + Unsafe.Write(stackPtr, value); + } + } + } + + public class BasicReadWriteBenchmarkBgr : BasicReadWriteBenchmark { } + + static int Main() + { + try + { + BasicReadWriteBenchmarkBgr.ReadFromStack(); + BasicReadWriteBenchmarkBgr.WriteToStack(); + } + catch (Exception e) + { + Console.WriteLine("Failed with Exception: " + e.Message); + return -1; + } + return 100; + } + } +} diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.csproj b/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.csproj new file mode 100644 index 0000000..fb6ae36 --- /dev/null +++ b/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.csproj @@ -0,0 +1,18 @@ + + + + + Release + AnyCPU + $(MSBuildProjectName) + Exe + + True + True + + + + + + +