[Arm64] Use ldp/stp in genCodeForCpObj()
authorSteve MacLean, Qualcomm Datacenter Technologies, Inc <sdmaclea@qti.qualcomm.com>
Tue, 25 Apr 2017 23:03:36 +0000 (23:03 +0000)
committerSteve MacLean, Qualcomm Datacenter Technologies, Inc <sdmaclea@qti.qualcomm.com>
Wed, 26 Apr 2017 04:00:20 +0000 (04:00 +0000)
Commit migrated from https://github.com/dotnet/coreclr/commit/13118c0b7b0dcd527d687edfddd72606c891a433

src/coreclr/src/jit/codegenarm64.cpp
src/coreclr/src/jit/lsraarmarch.cpp

index 7de19f9..1095d52 100644 (file)
@@ -3461,11 +3461,14 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
     gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType);
     gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
 
+    unsigned slots = cpObjNode->gtSlots;
+
     // Temp register used to perform the sequence of loads and stores.
-    regNumber tmpReg = cpObjNode->GetSingleTempReg();
-    assert(genIsValidIntReg(tmpReg));
+    regNumber tmpReg = cpObjNode->ExtractTempReg();
+    regNumber tmpReg2 = (slots > 1) ? cpObjNode->GetSingleTempReg() : REG_NA;
+    assert(genIsValidIntReg(tmpReg) && (tmpReg != REG_WRITE_BARRIER_SRC_BYREF) && (tmpReg != REG_WRITE_BARRIER_DST_BYREF));
+    assert((slots == 1) || (genIsValidIntReg(tmpReg2) && (tmpReg2 != tmpReg) && (tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF) && (tmpReg2 != REG_WRITE_BARRIER_DST_BYREF)));
 
-    unsigned slots = cpObjNode->gtSlots;
     emitter* emit  = getEmitter();
 
     BYTE* gcPtrs = cpObjNode->gtGcPtrs;
@@ -3473,7 +3476,6 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
     // If we can prove it's on the stack we don't need to use the write barrier.
     if (dstOnStack)
     {
-        // TODO-ARM64-CQ: Consider using LDP/STP to save codesize.
         for (unsigned i = 0; i < slots; ++i)
         {
             emitAttr attr = EA_8BYTE;
@@ -3482,10 +3484,21 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
             else if (gcPtrs[i] == GCT_BYREF)
                 attr = EA_BYREF;
 
-            emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
-                                INS_OPTS_POST_INDEX);
-            emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
-                                INS_OPTS_POST_INDEX);
+            if((i + 1 < slots) && (gcPtrs[i] == gcPtrs[i + 1]))
+            {
+                emit->emitIns_R_R_R_I(INS_ldp, attr, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2*TARGET_POINTER_SIZE,
+                                      INS_OPTS_POST_INDEX);
+                emit->emitIns_R_R_R_I(INS_stp, attr, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2*TARGET_POINTER_SIZE,
+                                      INS_OPTS_POST_INDEX);
+                ++i;
+            }
+            else
+            {
+                emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+                                    INS_OPTS_POST_INDEX);
+                emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+                                    INS_OPTS_POST_INDEX);
+            }
         }
     }
     else
@@ -3498,11 +3511,21 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
             switch (gcPtrs[i])
             {
                 case TYPE_GC_NONE:
-                    // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots.
-                    emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
-                                        INS_OPTS_POST_INDEX);
-                    emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
-                                        INS_OPTS_POST_INDEX);
+                    if((i + 1 < slots) && (gcPtrs[i] == gcPtrs[i + 1]))
+                    {
+                        emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2*TARGET_POINTER_SIZE,
+                                              INS_OPTS_POST_INDEX);
+                        emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2*TARGET_POINTER_SIZE,
+                                              INS_OPTS_POST_INDEX);
+                        ++i;
+                    }
+                    else
+                    {
+                        emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+                                            INS_OPTS_POST_INDEX);
+                        emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+                                            INS_OPTS_POST_INDEX);
+                    }
                     break;
 
                 default:
index 7d999d8..107c831 100644 (file)
@@ -792,6 +792,14 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
             // a temporary register to perform the sequence of loads and stores.
             blkNode->gtLsraInfo.internalIntCount = 1;
 
+            if (size >= 2 * REGSIZE_BYTES)
+            {
+                // Use ldp/stp to reduce code size and improve performance
+                blkNode->gtLsraInfo.internalIntCount++;
+            }
+
+            blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF));
+
             dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
             // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
             // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,