Struct & SIMD improvements (#22255)
authorCarol Eidt <carol.eidt@microsoft.com>
Thu, 28 Mar 2019 18:23:10 +0000 (11:23 -0700)
committerGitHub <noreply@github.com>
Thu, 28 Mar 2019 18:23:10 +0000 (11:23 -0700)
* [WIP] Struct & SIMD improvements

- Enable CSE of struct values when handle is available (and add code to get the handle of HW SIMD types)
- Don't require block nodes for SIMD assignments
- Don't set `GTF_GLOB_REF` on `GT_OBJ` if it is local
- Set `lvRegStruct` on promoted SIMD fields
- Add tests for #19910 (fixed with this PR) and #3539 & #19438 (fixed with #21314)
- Additional cleanup

Fix #19910

18 files changed:
src/jit/codegencommon.cpp
src/jit/gentree.cpp
src/jit/gentree.h
src/jit/importer.cpp
src/jit/lclvars.cpp
src/jit/liveness.cpp
src/jit/lower.cpp
src/jit/morph.cpp
src/jit/optcse.cpp
src/jit/rationalize.cpp
src/jit/rationalize.h
src/jit/simd.cpp
tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.cs [new file with mode: 0644]
tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.csproj [new file with mode: 0644]
tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.cs [new file with mode: 0644]
tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.csproj [new file with mode: 0644]
tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.cs [new file with mode: 0644]
tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.csproj [new file with mode: 0644]

index 8f31deb..03e171b 100644 (file)
@@ -4534,15 +4534,7 @@ void CodeGen::genCheckUseBlockInit()
                 unless they are untracked GC type or structs that contain GC pointers */
             CLANG_FORMAT_COMMENT_ANCHOR;
 
-#if FEATURE_SIMD
-            // TODO-1stClassStructs
-            // This is here to duplicate previous behavior, where TYP_SIMD8 locals
-            // were not being re-typed correctly.
-            if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
-#else  // !FEATURE_SIMD
-            if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
-#endif // !FEATURE_SIMD
-                varDsc->lvOnFrame &&
+            if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) && varDsc->lvOnFrame &&
                 (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
             {
                 varDsc->lvMustInit = true;
index 2b65206..93de32a 100644 (file)
@@ -16529,15 +16529,45 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
                 if (varTypeIsSIMD(tree))
                 {
                     structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT);
+#ifdef FEATURE_HW_INTRINSICS
+                    if (structHnd == NO_CLASS_HANDLE)
+                    {
+                        structHnd = gtGetStructHandleForHWSIMD(tree->gtType, TYP_FLOAT);
+                    }
+#endif
                 }
                 else
 #endif
                 {
+                    // Attempt to find a handle for this expression.
+                    // We can do this for an array element indirection, or for a field indirection.
                     ArrayInfo arrInfo;
                     if (TryGetArrayInfo(tree->AsIndir(), &arrInfo))
                     {
                         structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
                     }
+                    else
+                    {
+                        GenTree* addr = tree->AsIndir()->Addr();
+                        if ((addr->OperGet() == GT_ADD) && addr->gtGetOp2()->OperIs(GT_CNS_INT))
+                        {
+                            FieldSeqNode* fieldSeq = addr->gtGetOp2()->AsIntCon()->gtFieldSeq;
+
+                            if (fieldSeq != nullptr)
+                            {
+                                while (fieldSeq->m_next != nullptr)
+                                {
+                                    fieldSeq = fieldSeq->m_next;
+                                }
+                                if (fieldSeq != FieldSeqStore::NotAField() && !fieldSeq->IsPseudoField())
+                                {
+                                    CORINFO_FIELD_HANDLE fieldHnd = fieldSeq->m_fieldHnd;
+                                    CorInfoType fieldCorType = info.compCompHnd->getFieldType(fieldHnd, &structHnd);
+                                    assert(fieldCorType == CORINFO_TYPE_VALUECLASS);
+                                }
+                            }
+                        }
+                    }
                 }
                 break;
 #ifdef FEATURE_SIMD
index ef3bca2..6893c35 100644 (file)
@@ -4712,22 +4712,28 @@ struct GenTreeObj : public GenTreeBlk
         }
     }
 
+    void Init()
+    {
+        // By default, an OBJ is assumed to be a global reference, unless it is local.
+        GenTreeLclVarCommon* lcl = Addr()->IsLocalAddrExpr();
+        if ((lcl == nullptr) || ((lcl->gtFlags & GTF_GLOB_EFFECT) != 0))
+        {
+            gtFlags |= GTF_GLOB_REF;
+        }
+        noway_assert(gtClass != NO_CLASS_HANDLE);
+        _gtGcPtrCount = UINT32_MAX;
+    }
+
     GenTreeObj(var_types type, GenTree* addr, CORINFO_CLASS_HANDLE cls, unsigned size)
         : GenTreeBlk(GT_OBJ, type, addr, size), gtClass(cls)
     {
-        // By default, an OBJ is assumed to be a global reference.
-        gtFlags |= GTF_GLOB_REF;
-        noway_assert(cls != NO_CLASS_HANDLE);
-        _gtGcPtrCount = UINT32_MAX;
+        Init();
     }
 
     GenTreeObj(var_types type, GenTree* addr, GenTree* data, CORINFO_CLASS_HANDLE cls, unsigned size)
         : GenTreeBlk(GT_STORE_OBJ, type, addr, data, size), gtClass(cls)
     {
-        // By default, an OBJ is assumed to be a global reference.
-        gtFlags |= GTF_GLOB_REF;
-        noway_assert(cls != NO_CLASS_HANDLE);
-        _gtGcPtrCount = UINT32_MAX;
+        Init();
     }
 
 #if DEBUGGABLE_GENTREE
index ab99a5c..74ec369 100644 (file)
@@ -618,21 +618,8 @@ inline void Compiler::impAppendStmt(GenTree* stmt, unsigned chkLevel)
                     // Since we don't know what it assigns to, we need to spill global refs.
                     spillGlobEffects = true;
                 }
-                else if (!expr->OperIsBlkOp())
+                else if ((lhs->gtFlags & GTF_GLOB_REF) != 0)
                 {
-                    // If we are assigning to a global ref, we have to spill global refs on stack
-                    if ((lhs->gtFlags & GTF_GLOB_REF) != 0)
-                    {
-                        spillGlobEffects = true;
-                    }
-                }
-                else if ((lhs->OperIsBlk() && !lhs->AsBlk()->HasGCPtr()) ||
-                         ((lhs->OperGet() == GT_LCL_VAR) &&
-                          (lvaTable[lhs->AsLclVarCommon()->gtLclNum].lvStructGcCount == 0)))
-                {
-                    // TODO-1stClassStructs: Previously, spillGlobEffects was set to true for
-                    // GT_INITBLK and GT_COPYBLK, but this is overly conservative, and should be
-                    // revisited. (Note that it was NOT set to true for GT_COPYOBJ.)
                     spillGlobEffects = true;
                 }
             }
@@ -1389,8 +1376,6 @@ GenTree* Compiler::impAssignStructPtr(GenTree*             destAddr,
     }
     if (dest == nullptr)
     {
-        // TODO-1stClassStructs: We shouldn't really need a block node as the destination
-        // if this is a known struct type.
         if (asgType == TYP_STRUCT)
         {
             dest = gtNewObjNode(structHnd, destAddr);
@@ -1401,10 +1386,6 @@ GenTree* Compiler::impAssignStructPtr(GenTree*             destAddr,
             dest->gtFlags &= ~GTF_GLOB_REF;
             dest->gtFlags |= (destAddr->gtFlags & GTF_GLOB_REF);
         }
-        else if (varTypeIsStruct(asgType))
-        {
-            dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, destAddr, genTypeSize(asgType));
-        }
         else
         {
             dest = gtNewOperNode(GT_IND, asgType, destAddr);
@@ -14527,9 +14508,8 @@ void Compiler::impImportBlockCode(BasicBlock* block)
                         assert(!"Unexpected fieldAccessor");
                 }
 
-                // Create the member assignment, unless we have a struct.
-                // TODO-1stClassStructs: This could be limited to TYP_STRUCT, to avoid extra copies.
-                bool deferStructAssign = varTypeIsStruct(lclTyp);
+                // Create the member assignment, unless we have a TYP_STRUCT.
+                bool deferStructAssign = (lclTyp == TYP_STRUCT);
 
                 if (!deferStructAssign)
                 {
@@ -16220,10 +16200,6 @@ GenTree* Compiler::impAssignSmallStructTypeToVar(GenTree* op, CORINFO_CLASS_HAND
     unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for small struct return."));
     impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_ALL);
     GenTree* ret = gtNewLclvNode(tmpNum, lvaTable[tmpNum].lvType);
-
-    // TODO-1stClassStructs: Handle constant propagation and CSE-ing of small struct returns.
-    ret->gtFlags |= GTF_DONT_CSE;
-
     return ret;
 }
 
index 722e18f..0f6cdf2 100644 (file)
@@ -2175,6 +2175,9 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum)
             // Set size to zero so that lvaSetStruct will appropriately set the SIMD-relevant fields.
             fieldVarDsc->lvExactSize = 0;
             compiler->lvaSetStruct(varNum, pFieldInfo->fldTypeHnd, false, true);
+            // We will not recursively promote this, so mark it as 'lvRegStruct' (note that we wouldn't
+            // be promoting this if we didn't think it could be enregistered.
+            fieldVarDsc->lvRegStruct = true;
         }
 #endif // FEATURE_SIMD
 
@@ -7047,8 +7050,6 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
             printf(" V%02u.%s(offs=0x%02x)", varDsc->lvParentLcl, eeGetFieldName(fldHnd), varDsc->lvFldOffset);
 
             lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
-            // We should never have lvIsStructField set if it is a reg-sized non-field-addressed struct.
-            assert(!varDsc->lvRegStruct);
             switch (promotionType)
             {
                 case PROMOTION_TYPE_NONE:
index 2fb225e..99f4db7 100644 (file)
@@ -2160,14 +2160,14 @@ bool Compiler::fgRemoveDeadStore(GenTree**        pTree,
         addrNode = tree;
     }
 
-    // Next, find the assignment.
+    // Next, find the assignment (i.e. if we didn't have a LocalStore)
     if (asgNode == nullptr)
     {
         if (addrNode == nullptr)
         {
             asgNode = nextNode;
         }
-        else if (asgNode == nullptr)
+        else
         {
             // This may be followed by GT_IND/assign or GT_STOREIND.
             if (nextNode == nullptr)
@@ -2180,19 +2180,22 @@ bool Compiler::fgRemoveDeadStore(GenTree**        pTree,
                 assert(nextNode->OperGet() != GT_NULLCHECK);
                 if (nextNode->OperIsStore())
                 {
+                    // This is a store, which takes a location and a value to be stored.
+                    // It's 'rhsNode' is the value to be stored.
                     asgNode = nextNode;
                     if (asgNode->OperIsBlk())
                     {
                         rhsNode = asgNode->AsBlk()->Data();
                     }
-                    // TODO-1stClassStructs: There should be an else clause here to handle
-                    // the non-block forms of store ops (GT_STORE_LCL_VAR, etc.) for which
-                    // rhsNode is op1. (This isn't really a 1stClassStructs item, but the
-                    // above was added to catch what used to be dead block ops, and that
-                    // made this omission apparent.)
+                    else
+                    {
+                        // This is a non-block store.
+                        rhsNode = asgNode->gtGetOp2();
+                    }
                 }
                 else
                 {
+                    // This is a non-store indirection, and the assignment will com after it.
                     asgNode = nextNode->gtNext;
                 }
             }
index fc507c4..01f181f 100644 (file)
@@ -1765,11 +1765,7 @@ void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
                     if (op1->OperGet() == GT_LCL_VAR_ADDR)
                     {
                         unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
-                        // TODO-1stClassStructs: This is here to duplicate previous behavior,
-                        // but is not needed because the scenario being quirked did not involve
-                        // a SIMD or enregisterable struct.
-                        // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT)
-                        if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet()))
+                        if (comp->lvaGetDesc(lclNum)->TypeGet() == TYP_STRUCT)
                         {
                             // First arg is addr of a struct local.
                             paddingNeeded = true;
index fb6b3f4..78f43eb 100644 (file)
@@ -2153,16 +2153,10 @@ GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
 
                 // Create an Obj of the temp to use it as a call argument.
                 arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
-
-                // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
-                // this is only to preserve former behavior (though some CSE'ing of struct
-                // values can be pessimizing, so enabling this may require some additional tuning).
-                arg->gtFlags |= GTF_DONT_CSE;
             }
 #else
             // Always create an Obj of the temp to use it as a call argument.
             arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
-            arg->gtFlags |= GTF_DONT_CSE;
 #endif // !_TARGET_ARM64_
 #endif // FEATURE_MULTIREG_ARGS
         }
@@ -6615,14 +6609,6 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
 
     GenTree* res = fgMorphSmpOp(tree);
 
-    // If we have a struct type, this node would previously have been under a GT_ADDR,
-    // and therefore would have been marked GTF_DONT_CSE.
-    // TODO-1stClassStructs: revisit this.
-    if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
-    {
-        res->gtFlags |= GTF_DONT_CSE;
-    }
-
     if (fldOffset == 0 && res->OperGet() == GT_IND)
     {
         GenTree* addr = res->gtOp.gtOp1;
@@ -9000,24 +8986,8 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
     bool       isCopyBlock    = asg->OperIsCopyBlkOp();
     bool       isInitBlock    = !isCopyBlock;
 
-    unsigned             size;
+    unsigned             size   = 0;
     CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
-#ifdef FEATURE_SIMD
-    // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD/GT_HWIntrinsic)
-    // The SIMD type in question could be Vector2f which is 8-bytes in size.
-    // The below check is to make sure that we don't turn that copyblk
-    // into a assignment, since rationalizer logic will transform the
-    // copyblk appropriately. Otherwise, the transformation made in this
-    // routine will prevent rationalizer logic and we might end up with
-    // GT_ADDR(GT_SIMD/GT_HWIntrinsic) node post rationalization, leading to a noway assert
-    // in codegen.
-    // TODO-1stClassStructs: This is here to preserve old behavior.
-    // It should be eliminated.
-    if (src->OperIsSimdOrHWintrinsic())
-    {
-        return nullptr;
-    }
-#endif
 
     if (dest->gtEffectiveVal()->OperIsBlk())
     {
@@ -9037,24 +9007,43 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
     {
         // Is this an enregisterable struct that is already a simple assignment?
         // This can happen if we are re-morphing.
-        if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
+        // Note that we won't do this straightaway if this is a SIMD type, since it
+        // may be a promoted lclVar (sometimes we promote the individual float fields of
+        // fixed-size SIMD).
+        if (dest->OperGet() == GT_IND)
         {
-            return tree;
+            noway_assert(asgType != TYP_STRUCT);
+            if (varTypeIsStruct(asgType))
+            {
+                destLclVarTree = fgIsIndirOfAddrOfLocal(dest);
+            }
+            if (isCopyBlock && destLclVarTree == nullptr && !src->OperIs(GT_LCL_VAR))
+            {
+                fgMorphBlockOperand(src, asgType, genTypeSize(asgType), false /*isDest*/);
+                return tree;
+            }
         }
-        noway_assert(dest->OperIsLocal());
-        destLclVarTree = dest;
-        destVarNum     = destLclVarTree->AsLclVarCommon()->gtLclNum;
-        destVarDsc     = &(lvaTable[destVarNum]);
-        if (isCopyBlock)
+        else
         {
-            clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
-            size   = info.compCompHnd->getClassSize(clsHnd);
+            noway_assert(dest->OperIsLocal());
+            destLclVarTree = dest;
         }
-        else
+        if (destLclVarTree != nullptr)
+        {
+            destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum;
+            destVarDsc = &(lvaTable[destVarNum]);
+            if (asgType == TYP_STRUCT)
+            {
+                clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
+                size   = destVarDsc->lvExactSize;
+            }
+        }
+        if (asgType != TYP_STRUCT)
         {
-            size = destVarDsc->lvExactSize;
+            size = genTypeSize(asgType);
         }
     }
+    noway_assert((size != 0) || dest->OperIs(GT_DYN_BLK));
 
     //
     //  See if we can do a simple transformation:
@@ -9161,7 +9150,7 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
             // holes, whose contents could be meaningful in unsafe code.  If we decide that's a valid
             // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
             // memory of the struct prevent field-wise assignments.  Same situation exists for the "src" decision.
-            if (varTypeIsStruct(destLclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
+            if (varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted)
             {
                 // Let fgMorphInitBlock handle it.  (Since we'll need to do field-var-wise assignments.)
                 return nullptr;
@@ -9256,11 +9245,9 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
                 else
                 {
                     // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar))
-                    // or indir(lclVarAddr) in rational form and liveness won't account for these uses. That said,
-                    // we have to mark this local as address exposed so we don't delete it as a dead store later on.
-                    unsigned lclVarNum                = srcLclVarTree->gtLclVarCommon.gtLclNum;
-                    lvaTable[lclVarNum].lvAddrExposed = true;
-                    lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
+                    // or indir(lclVarAddr) so it must be on the stack.
+                    unsigned lclVarNum = srcLclVarTree->gtLclVarCommon.gtLclNum;
+                    lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_BlockOp));
                     GenTree* srcAddr;
                     if (src == srcLclVarTree)
                     {
@@ -9289,9 +9276,8 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
                 src->SetIndirExceptionFlags(this);
             }
         }
-        else
+        else // InitBlk
         {
-// InitBlk
 #if FEATURE_SIMD
             if (varTypeIsSIMD(asgType))
             {
@@ -10022,11 +10008,17 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
         {
             if (indirTree != nullptr)
             {
-                // We should never find a struct indirection on the lhs of an assignment.
-                assert(!isDest || indirTree->OperIsBlk());
-                if (!isDest && indirTree->OperIsBlk())
+                if (indirTree->OperIsBlk())
+                {
+                    if (!isDest || (asgType != TYP_STRUCT))
+                    {
+                        (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+                    }
+                }
+                else
                 {
-                    (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+                    // We should never find a TYP_STRUCT GT_IND on the lhs of an assignment.
+                    assert(!isDest || (asgType != TYP_STRUCT));
                 }
             }
             else
@@ -10047,14 +10039,6 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
                         {
                             gtSetObjGcInfo(newTree->AsObj());
                         }
-                        if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
-                        {
-                            // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
-                            // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
-                            // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
-                            // separately now to avoid excess diffs.
-                            newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
-                        }
                     }
                 }
                 else
@@ -10530,11 +10514,11 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
             }
         }
 
+        var_types asgType = dest->TypeGet();
         if (requiresCopyBlock)
         {
-            var_types asgType = dest->TypeGet();
-            dest              = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
-            asg->gtOp.gtOp1   = dest;
+            dest            = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
+            asg->gtOp.gtOp1 = dest;
             asg->gtFlags |= (dest->gtFlags & GTF_ALL_EFFECT);
 
             // Note that the unrolling of CopyBlk is only implemented on some platforms.
@@ -10557,21 +10541,6 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
             rhs             = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
             asg->gtOp.gtOp2 = rhs;
 
-            // Formerly, liveness did not consider copyblk arguments of simple types as being
-            // a use or def, so these variables were marked as address-exposed.
-            // TODO-1stClassStructs: This should no longer be needed.
-            if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
-            {
-                JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
-                lvaTable[srcLclNum].lvAddrExposed = true;
-            }
-
-            if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
-            {
-                JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
-                lvaTable[destLclNum].lvAddrExposed = true;
-            }
-
             goto _Done;
         }
 
@@ -10601,7 +10570,7 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
         else if (destDoFldAsg)
         {
             fieldCnt = destLclVar->lvFieldCnt;
-            rhs      = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
+            rhs      = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*isDest*/);
             if (srcAddr == nullptr)
             {
                 srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
@@ -10611,7 +10580,7 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
         {
             assert(srcDoFldAsg);
             fieldCnt = srcLclVar->lvFieldCnt;
-            dest     = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
+            dest     = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
             if (dest->OperIsBlk())
             {
                 (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
@@ -10692,6 +10661,9 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
 
         if (addrSpill != nullptr)
         {
+            // Simplify the address if possible, and mark as DONT_CSE as needed..
+            addrSpill = fgMorphTree(addrSpill);
+
             // Spill the (complex) address to a BYREF temp.
             // Note, at most one address may need to be spilled.
             addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
@@ -10816,9 +10788,6 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree)
 
                 noway_assert(srcLclVarTree != nullptr);
                 src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
-                // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
-                // but they are when they are under a GT_ADDR.
-                src->gtFlags |= GTF_DONT_CSE;
             }
             else
             {
@@ -18936,12 +18905,8 @@ bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTree* st
     }
 #endif
 
-    // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
-    GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
-    blkNode->gtType  = simdType;
-    tree             = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
-                          false, // not volatile
-                          true); // copyBlock
+    GenTree* dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst);
+    tree             = gtNewAssignNode(dstNode, simdStructNode);
 
     stmt->gtStmt.gtStmtExpr = tree;
 
index 8ef5ae7..fd20c77 100644 (file)
@@ -1415,8 +1415,8 @@ public:
 
             if (!varTypeIsFloating(varTyp))
             {
-                // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior.
-                // Note that this makes genTypeStSz return 1.
+                // TODO-1stClassStructs: Revisit this; it is here to duplicate previous behavior.
+                // Note that this makes genTypeStSz return 1, but undoing it pessimizes some code.
                 if (varTypeIsStruct(varTyp))
                 {
                     varTyp = TYP_STRUCT;
@@ -1754,6 +1754,22 @@ public:
         // Each CSE Def will contain two Refs and each CSE Use will have one Ref of this new LclVar
         unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
 
+        bool      canEnregister = true;
+        unsigned  slotCount     = 1;
+        var_types cseLclVarTyp  = genActualType(candidate->Expr()->TypeGet());
+        if (candidate->Expr()->TypeGet() == TYP_STRUCT)
+        {
+            // This is a non-enregisterable struct.
+            canEnregister                  = false;
+            GenTree*             value     = candidate->Expr();
+            CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(candidate->Expr());
+            assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT));
+            unsigned size = m_pCompiler->info.compCompHnd->getClassSize(structHnd);
+            // Note that the slotCount is used to estimate the reference cost, but it may overestimate this
+            // because it doesn't take into account that we might use a vector register for struct copies.
+            slotCount = (size + TARGET_POINTER_SIZE - 1) / TARGET_POINTER_SIZE;
+        }
+
         if (CodeOptKind() == Compiler::SMALL_CODE)
         {
             if (cseRefCnt >= aggressiveRefCnt)
@@ -1764,10 +1780,10 @@ public:
                     printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
                 }
 #endif
-                cse_def_cost = 1;
-                cse_use_cost = 1;
+                cse_def_cost = slotCount;
+                cse_use_cost = slotCount;
 
-                if (candidate->LiveAcrossCall() != 0)
+                if (candidate->LiveAcrossCall() || !canEnregister)
                 {
                     if (largeFrame)
                     {
@@ -1796,13 +1812,13 @@ public:
 #else                             // _TARGET_ARM_
                 if (hugeFrame)
                 {
-                    cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10]
-                    cse_use_cost = 12;
+                    cse_def_cost = 10 + (2 * slotCount); // movw/movt r10 and str reg,[sp+r10]
+                    cse_use_cost = 10 + (2 * slotCount);
                 }
                 else
                 {
-                    cse_def_cost = 8; // movw r10 and str reg,[sp+r10]
-                    cse_use_cost = 8;
+                    cse_def_cost = 6 + (2 * slotCount); // movw r10 and str reg,[sp+r10]
+                    cse_use_cost = 6 + (2 * slotCount);
                 }
 #endif
             }
@@ -1816,17 +1832,17 @@ public:
 #endif
 #ifdef _TARGET_XARCH_
                 /* The following formula is good choice when optimizing CSE for SMALL_CODE */
-                cse_def_cost = 3; // mov [EBP-1C],reg
-                cse_use_cost = 2; //     [EBP-1C]
-#else                             // _TARGET_ARM_
-                cse_def_cost = 2; // str reg,[sp+0x9c]
-                cse_use_cost = 2; // ldr reg,[sp+0x9c]
+                cse_def_cost = 3 * slotCount; // mov [EBP-1C],reg
+                cse_use_cost = 2 * slotCount; //     [EBP-1C]
+#else                                         // _TARGET_ARM_
+                cse_def_cost = 2 * slotCount; // str reg,[sp+0x9c]
+                cse_use_cost = 2 * slotCount; // ldr reg,[sp+0x9c]
 #endif
             }
         }
         else // not SMALL_CODE ...
         {
-            if (cseRefCnt >= aggressiveRefCnt)
+            if ((cseRefCnt >= aggressiveRefCnt) && canEnregister)
             {
 #ifdef DEBUG
                 if (m_pCompiler->verbose)
@@ -1834,13 +1850,13 @@ public:
                     printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
                 }
 #endif
-                cse_def_cost = 1;
-                cse_use_cost = 1;
+                cse_def_cost = slotCount;
+                cse_use_cost = slotCount;
             }
             else if (cseRefCnt >= moderateRefCnt)
             {
 
-                if (candidate->LiveAcrossCall() == 0)
+                if (!candidate->LiveAcrossCall() && canEnregister)
                 {
 #ifdef DEBUG
                     if (m_pCompiler->verbose)
@@ -1852,7 +1868,7 @@ public:
                     cse_def_cost = 2;
                     cse_use_cost = 1;
                 }
-                else // candidate is live across call
+                else // candidate is live across call or not enregisterable.
                 {
 #ifdef DEBUG
                     if (m_pCompiler->verbose)
@@ -1860,15 +1876,15 @@ public:
                         printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt);
                     }
 #endif
-                    cse_def_cost   = 2;
-                    cse_use_cost   = 2;
+                    cse_def_cost   = 2 * slotCount;
+                    cse_use_cost   = 2 * slotCount;
                     extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
                                                           // saved register
                 }
             }
             else // Conservative CSE promotion
             {
-                if (candidate->LiveAcrossCall() == 0)
+                if (!candidate->LiveAcrossCall() && canEnregister)
                 {
 #ifdef DEBUG
                     if (m_pCompiler->verbose)
@@ -1888,8 +1904,8 @@ public:
                         printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
                     }
 #endif
-                    cse_def_cost   = 3;
-                    cse_use_cost   = 3;
+                    cse_def_cost   = 3 * slotCount;
+                    cse_use_cost   = 3 * slotCount;
                     extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
                                                           // saved register
                 }
@@ -1897,8 +1913,8 @@ public:
                 // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
                 if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
                 {
-                    cse_def_cost++;
-                    cse_use_cost++;
+                    cse_def_cost += slotCount;
+                    cse_use_cost += slotCount;
                 }
             }
 
@@ -2031,7 +2047,20 @@ public:
         var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
         if (varTypeIsStruct(cseLclVarTyp))
         {
-            m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false);
+            // After call args have been morphed, we don't need a handle for SIMD types.
+            // They are only required where the size is not implicit in the type and/or there are GC refs.
+            CORINFO_CLASS_HANDLE structHnd = m_pCompiler->gtGetStructHandleIfPresent(successfulCandidate->Expr());
+            assert((structHnd != NO_CLASS_HANDLE) || (cseLclVarTyp != TYP_STRUCT));
+            if (structHnd != NO_CLASS_HANDLE)
+            {
+                m_pCompiler->lvaSetStruct(cseLclVarNum, structHnd, false);
+            }
+#ifdef FEATURE_SIMD
+            else if (varTypeIsSIMD(cseLclVarTyp))
+            {
+                m_pCompiler->lvaGetDesc(cseLclVarNum)->lvSIMDType = true;
+            }
+#endif // FEATURE_SIMD
         }
         m_pCompiler->lvaTable[cseLclVarNum].lvType  = cseLclVarTyp;
         m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true;
@@ -2305,14 +2334,26 @@ public:
                 GenTree* val = exp;
 
                 /* Create an assignment of the value to the temp */
-                GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
+                GenTree* asg     = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
+                GenTree* origAsg = asg;
+
+                if (!asg->OperIs(GT_ASG))
+                {
+                    // This can only be the case for a struct in which the 'val' was a COMMA, so
+                    // the assignment is sunk below it.
+                    asg = asg->gtEffectiveVal(true);
+                    noway_assert(origAsg->OperIs(GT_COMMA) && (origAsg == val));
+                }
+                else
+                {
+                    noway_assert(asg->gtOp.gtOp2 == val);
+                }
 
                 // assign the proper Value Numbers
                 asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void
                 asg->gtOp.gtOp1->gtVNPair = val->gtVNPair;         // The dest op is the same as 'val'
 
                 noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR);
-                noway_assert(asg->gtOp.gtOp2 == val);
 
                 /* Create a reference to the CSE temp */
                 GenTree* ref  = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
@@ -2325,7 +2366,7 @@ public:
                 }
 
                 /* Create a comma node for the CSE assignment */
-                cse           = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref);
+                cse           = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, origAsg, ref);
                 cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val'
                                                // as the assignment to the CSE LclVar
                                                // cannot add any new exceptions
@@ -2536,16 +2577,17 @@ bool Compiler::optIsCSEcandidate(GenTree* tree)
         return false;
     }
 
-    /* The only reason a TYP_STRUCT tree might occur is as an argument to
-       GT_ADDR. It will never be actually materialized. So ignore them.
-       Also TYP_VOIDs */
-
     var_types  type = tree->TypeGet();
     genTreeOps oper = tree->OperGet();
 
-    // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming
-    // to use regular assignments, or handling copyObj.
-    if (varTypeIsStruct(type) || type == TYP_VOID)
+    if (type == TYP_VOID)
+    {
+        return false;
+    }
+
+    // If this is a struct type, we can only consider it for CSE-ing if we can get at
+    // its handle, so that we can create a temp.
+    if ((type == TYP_STRUCT) && (gtGetStructHandleIfPresent(tree) == NO_CLASS_HANDLE))
     {
         return false;
     }
index 1cdb54f..d51634b 100644 (file)
@@ -552,7 +552,7 @@ void Rationalizer::RewriteAddress(LIR::Use& use)
     JITDUMP("\n");
 }
 
-Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parentStack)
+Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parentStack)
 {
     assert(useEdge != nullptr);
 
@@ -755,39 +755,21 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
             assert(comp->IsTargetIntrinsic(node->gtIntrinsic.gtIntrinsicId));
             break;
 
-#ifdef FEATURE_SIMD
         case GT_BLK:
+            // We should only see GT_BLK for TYP_STRUCT.
+            assert(node->TypeGet() == TYP_STRUCT);
+            break;
+
         case GT_OBJ:
-        {
-            // TODO-1stClassStructs: These should have been transformed to GT_INDs, but in order
-            // to preserve existing behavior, we will keep this as a block node if this is the
-            // lhs of a block assignment, and either:
-            // - It is a "generic" TYP_STRUCT assignment, OR
-            // - It is an initblk, OR
-            // - Neither the lhs or rhs are known to be of SIMD type.
-
-            GenTree* parent  = use.User();
-            bool     keepBlk = false;
-            if ((parent->OperGet() == GT_ASG) && (node == parent->gtGetOp1()))
+            assert(node->TypeGet() == TYP_STRUCT || !use.User()->OperIsInitBlkOp());
+            if (varTypeIsSIMD(node))
             {
-                if ((node->TypeGet() == TYP_STRUCT) || parent->OperIsInitBlkOp())
-                {
-                    keepBlk = true;
-                }
-                else if (!comp->isAddrOfSIMDType(node->AsBlk()->Addr()))
-                {
-                    GenTree* dataSrc = parent->gtGetOp2();
-                    if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD) && (!dataSrc->OperIsHWIntrinsic()))
-                    {
-                        noway_assert(dataSrc->OperIsIndir());
-                        keepBlk = !comp->isAddrOfSIMDType(dataSrc->AsIndir()->Addr());
-                    }
-                }
+                // Rewrite these as GT_IND.
+                RewriteSIMDOperand(use, false);
             }
-            RewriteSIMDOperand(use, keepBlk);
-        }
-        break;
+            break;
 
+#ifdef FEATURE_SIMD
         case GT_SIMD:
         {
             noway_assert(comp->featureSIMD);
index 41404a2..297db36 100644 (file)
@@ -47,14 +47,14 @@ private:
 #endif
                            GenTreeArgList* args);
 
-    void RewriteIntrinsicAsUserCall(GenTree** use, ArrayStack<GenTree*>& parents);
+    void RewriteIntrinsicAsUserCall(GenTree** use, Compiler::GenTreeStack& parents);
 
     // Other transformations
     void RewriteAssignment(LIR::Use& use);
     void RewriteAddress(LIR::Use& use);
 
     // Root visitor
-    Compiler::fgWalkResult RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parents);
+    Compiler::fgWalkResult RewriteNode(GenTree** useEdge, Compiler::GenTreeStack& parents);
 };
 
 inline Rationalizer::Rationalizer(Compiler* _comp) : Phase(_comp, "IR Rationalize", PHASE_RATIONALIZE)
index 1281184..ea15a9d 100644 (file)
@@ -2967,6 +2967,10 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE                opcode,
                     op2 = gtCloneExpr(index);
                 }
 
+                // For the non-constant case, we don't want to CSE the SIMD value, as we will just need to store
+                // it to the stack to do the indexing anyway.
+                op1->gtFlags |= GTF_DONT_CSE;
+
                 GenTree*          lengthNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength);
                 GenTreeBoundsChk* simdChk =
                     new (this, GT_SIMD_CHK) GenTreeBoundsChk(GT_SIMD_CHK, TYP_VOID, index, lengthNode, SCK_RNGCHK_FAIL);
diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.cs b/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.cs
new file mode 100644 (file)
index 0000000..b57cae9
--- /dev/null
@@ -0,0 +1,126 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+// This test executes the same computation on a wrapped Vector4 ('float4') and a
+// (not wrapped) Vector4. The code should be similar.
+// This was a perf regression issue, so this test is mostly useful for running
+// asm diffs.
+
+namespace GitHub_19438
+{
+    class Program
+    {
+        struct float4
+        {
+            public Vector4 v;
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public float4(float _x, float _y, float _z, float _w)
+            {
+                v = new Vector4(_x, _y, _z, _w);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public float4(Vector4 _v)
+            {
+                v = _v;
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float4 operator +(float4 a, float4 b)
+            {
+                return new float4(a.v + b.v);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float4 operator -(float4 a, float4 b)
+            {
+                return new float4(a.v - b.v);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float4 operator *(float4 a, float4 b)
+            {
+                return new float4(a.v * b.v);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public static float4 operator /(float4 a, float4 b)
+            {
+                return new float4(a.v / b.v);
+            }
+
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            public override string ToString()
+            {
+                return v.ToString();
+            }
+        }
+
+        static int Main(string[] args)
+        {
+            const int iterationCount = 10;
+            const int itemCount = 1000000;
+
+            long totalTaskTime = 0;
+
+            // First, use a wrapped Vector4.
+            List<float4> items = new List<float4>(itemCount);
+            for (int iteration = 0; iteration < iterationCount; ++iteration)
+            {
+                var taskTimer = Stopwatch.StartNew();
+
+                for (int item = 0; item < itemCount; ++item)
+                {
+                    float4 v0 = new float4(1.0f, 2.0f, 3.0f, 4.0f);
+                    float4 v1 = new float4(5.0f, 6.0f, 7.0f, 8.0f);
+                    float4 v2 = (v0 * v1) - (v1 / v0 + v1);
+                    float4 v3 = (v2 * v0) - (v2 / v0 + v1);
+
+                    items.Add(v2 * v3);
+                }
+
+                taskTimer.Stop();
+                totalTaskTime += taskTimer.ElapsedMilliseconds;
+
+                items.Clear();
+                GC.Collect();
+            }
+            Console.WriteLine("Wrapped Average Time: " + totalTaskTime / iterationCount + "ms");
+
+            // Now, a plain Vector4
+            totalTaskTime = 0;
+            List<Vector4> items2 = new List<Vector4>(itemCount);
+            for (int iteration = 0; iteration < iterationCount; ++iteration)
+            {
+                var taskTimer = Stopwatch.StartNew();
+
+                for (int item = 0; item < itemCount; ++item)
+                {
+                    Vector4 v0 = new Vector4(1.0f, 2.0f, 3.0f, 4.0f);
+                    Vector4 v1 = new Vector4(5.0f, 6.0f, 7.0f, 8.0f);
+                    Vector4 v2 = (v0 * v1) - (v1 / v0 + v1);
+                    Vector4 v3 = (v2 * v0) - (v2 / v0 + v1);
+
+                    items2.Add(v2 * v3);
+                }
+
+                taskTimer.Stop();
+                totalTaskTime += taskTimer.ElapsedMilliseconds;
+
+                items2.Clear();
+                GC.Collect();
+            }
+            Console.WriteLine("Vector4 Average Time: " + totalTaskTime / iterationCount + "ms");
+
+            return 100;
+        }
+    }
+}
diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.csproj b/tests/src/JIT/Regression/JitBlue/GitHub_19438/GitHub_19438.csproj
new file mode 100644 (file)
index 0000000..d86ed9f
--- /dev/null
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Release</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <AssemblyName>$(MSBuildProjectName)</AssemblyName>
+    <OutputType>Exe</OutputType>
+    <DebugType></DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.cs b/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.cs
new file mode 100644 (file)
index 0000000..bb2d536
--- /dev/null
@@ -0,0 +1,40 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.CompilerServices;
+
+namespace GitHub_19910
+{
+    class Program
+    {
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        static void SwapNonGeneric(ref Vector128<uint> a, ref Vector128<uint> b)
+        {
+            Vector128<uint> tmp = a; a = b; b = tmp;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int Main()
+        {
+            if (Sse2.IsSupported)
+            {
+                Vector128<uint> a = Sse2.ConvertScalarToVector128UInt32(0xA);
+                Vector128<uint> b = Sse2.ConvertScalarToVector128UInt32(0xB);
+
+                Vector128<uint> tmp = a; a = b; b = tmp;    // in-place version
+                SwapNonGeneric(ref a, ref b);               // inlined version
+
+                if ((Sse2.ConvertToUInt32(a) != 0xA) || (Sse2.ConvertToUInt32(b) != 0xB))
+                {
+                    Console.WriteLine("A={0}, B={1}", Sse2.ConvertToUInt32(a), Sse2.ConvertToUInt32(b));
+                    return -1;
+                }
+            }
+            return 100;
+        }
+    }
+}
diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.csproj b/tests/src/JIT/Regression/JitBlue/GitHub_19910/GitHub_19910.csproj
new file mode 100644 (file)
index 0000000..d86ed9f
--- /dev/null
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Release</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <AssemblyName>$(MSBuildProjectName)</AssemblyName>
+    <OutputType>Exe</OutputType>
+    <DebugType></DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.cs b/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.cs
new file mode 100644 (file)
index 0000000..8fc6c68
--- /dev/null
@@ -0,0 +1,62 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+namespace GitHub_19910
+
+{
+    class Program
+    {
+        public struct Bgr { public byte B; public byte G; public byte R; }
+
+        public class BasicReadWriteBenchmark<T>
+            where T : struct
+        {
+
+            // NOTE: This includes cost of stack alloc
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static public void ReadFromStack()
+            {
+                unsafe
+                {
+                    void* stackPtr = stackalloc byte[Unsafe.SizeOf<Bgr>()];
+
+                    var value = Unsafe.Read<T>(stackPtr);
+                }
+            }
+
+            // NOTE: This includes cost of stack alloc
+            [MethodImpl(MethodImplOptions.NoInlining)]
+            static public void WriteToStack()
+            {
+                unsafe
+                {
+                    void* stackPtr = stackalloc byte[Unsafe.SizeOf<Bgr>()];
+
+                    T value = default(T);
+                    Unsafe.Write<T>(stackPtr, value);
+                }
+            }
+        }
+
+        public class BasicReadWriteBenchmarkBgr : BasicReadWriteBenchmark<Bgr> { }
+
+        static int Main()
+        {
+            try
+            {
+                BasicReadWriteBenchmarkBgr.ReadFromStack();
+                BasicReadWriteBenchmarkBgr.WriteToStack();
+            }
+            catch (Exception e)
+            {
+                Console.WriteLine("Failed with Exception: " + e.Message);
+                return -1;
+            }
+            return 100;
+        }
+    }
+}
diff --git a/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.csproj b/tests/src/JIT/Regression/JitBlue/GitHub_3539/GitHub_3539.csproj
new file mode 100644 (file)
index 0000000..fb6ae36
--- /dev/null
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
+  <PropertyGroup>
+    <Configuration Condition=" '$(Configuration)' == '' ">Release</Configuration>
+    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+    <AssemblyName>$(MSBuildProjectName)</AssemblyName>
+    <OutputType>Exe</OutputType>
+    <DebugType></DebugType>
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
+  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
+</Project>