Fix block store local address containment on ARM (dotnet/coreclr#27338)
authormikedn <onemihaid@hotmail.com>
Fri, 25 Oct 2019 17:56:34 +0000 (20:56 +0300)
committerSergey Andreenko <seandree@microsoft.com>
Fri, 25 Oct 2019 17:56:34 +0000 (10:56 -0700)
When the source/destination address was local, genCodeForCpBlkUnroll was folding the local offset into the address mode of the generated load/store instructions as if the local address was contained. But lowering didn't actually contain the address so useless ADD instructions were still being generated.

Commit migrated from https://github.com/dotnet/coreclr/commit/4d6fae3c42789f139e5a7c05207554bd76983780

src/coreclr/src/jit/codegenarmarch.cpp
src/coreclr/src/jit/lowerarmarch.cpp
src/coreclr/src/jit/lsraarmarch.cpp

index 7659ab3..4acf300 100644 (file)
@@ -2019,19 +2019,18 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
     if (!dstAddr->isContained())
     {
         dstAddrBaseReg = genConsumeReg(dstAddr);
-
-        // TODO-Cleanup: This matches the old code behavior in order to get 0 diffs. It's otherwise
-        // messed up - lowering does not mark a local address node contained even if it's possible
-        // and then the old code consumed the register but ignored it and generated code that accesses
-        // the local variable directly.
-        if (dstAddr->OperIsLocalAddr())
-        {
-            dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum();
-            dstOffset = dstAddr->OperIs(GT_LCL_FLD_ADDR) ? dstAddr->AsLclFld()->gtLclOffs : 0;
-        }
     }
     else
     {
+        // TODO-ARM-CQ: If the local frame offset is too large to be encoded, the emitter automatically
+        // loads the offset into a reserved register (see CodeGen::rsGetRsvdReg()). If we generate
+        // multiple store instructions we'll also generate multiple offset loading instructions.
+        // We could try to detect such cases, compute the base destination address in this reserved
+        // and use it in all store instructions we generate. This would effectively undo the effect
+        // of local address containment done by lowering.
+        //
+        // The same issue also occurs in source address case below and in genCodeForInitBlkUnroll.
+
         assert(dstAddr->OperIsLocalAddr());
         dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum();
         dstOffset = dstAddr->OperIs(GT_LCL_FLD_ADDR) ? dstAddr->AsLclFld()->gtLclOffs : 0;
@@ -2057,16 +2056,6 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
         if (!srcAddr->isContained())
         {
             srcAddrBaseReg = genConsumeReg(srcAddr);
-
-            // TODO-Cleanup: This matches the old code behavior in order to get 0 diffs. It's otherwise
-            // messed up - lowering does not mark a local address node contained even if it's possible
-            // and then the old code consumed the register but ignored it and generated code that accesses
-            // the local variable directly.
-            if (srcAddr->OperIsLocalAddr())
-            {
-                srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum();
-                srcOffset = srcAddr->OperIs(GT_LCL_FLD_ADDR) ? srcAddr->AsLclFld()->gtLclOffs : 0;
-            }
         }
         else
         {
index 90ed20a..4b31eda 100644 (file)
@@ -326,6 +326,20 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
             if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= CPBLK_UNROLL_LIMIT))
             {
                 blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+
+                if (src->OperIs(GT_IND))
+                {
+                    GenTree* srcAddr = src->AsIndir()->Addr();
+                    if (srcAddr->OperIsLocalAddr())
+                    {
+                        srcAddr->SetContained();
+                    }
+                }
+
+                if (dstAddr->OperIsLocalAddr())
+                {
+                    dstAddr->SetContained();
+                }
             }
             else
             {
index e5df4f2..5cc6157 100644 (file)
@@ -605,7 +605,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
         {
             assert(src->isContained());
             srcAddrOrFill = src->AsIndir()->Addr();
-            assert(!srcAddrOrFill->isContained());
         }
 
         if (blkNode->OperIs(GT_STORE_OBJ))
@@ -632,6 +631,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
             // which is killed by a StoreObj (and thus needn't be reserved).
             if (srcAddrOrFill != nullptr)
             {
+                assert(!srcAddrOrFill->isContained());
                 srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF;
             }
         }
@@ -655,6 +655,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode)
                     dstAddrRegMask = RBM_ARG_0;
                     if (srcAddrOrFill != nullptr)
                     {
+                        assert(!srcAddrOrFill->isContained());
                         srcRegMask = RBM_ARG_1;
                     }
                     sizeRegMask = RBM_ARG_2;