From 88196ea7557f5b789af1698e24553db55205acaf Mon Sep 17 00:00:00 2001 From: "Steve MacLean, Qualcomm Datacenter Technologies, Inc" Date: Tue, 25 Apr 2017 23:03:36 +0000 Subject: [PATCH] [Arm64] Use ldp/stp in genCodeForCpObj() Commit migrated from https://github.com/dotnet/coreclr/commit/13118c0b7b0dcd527d687edfddd72606c891a433 --- src/coreclr/src/jit/codegenarm64.cpp | 49 ++++++++++++++++++++++++++---------- src/coreclr/src/jit/lsraarmarch.cpp | 8 ++++++ 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 7de19f9..1095d52 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -3461,11 +3461,14 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddrType); gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet()); + unsigned slots = cpObjNode->gtSlots; + // Temp register used to perform the sequence of loads and stores. - regNumber tmpReg = cpObjNode->GetSingleTempReg(); - assert(genIsValidIntReg(tmpReg)); + regNumber tmpReg = cpObjNode->ExtractTempReg(); + regNumber tmpReg2 = (slots > 1) ? cpObjNode->GetSingleTempReg() : REG_NA; + assert(genIsValidIntReg(tmpReg) && (tmpReg != REG_WRITE_BARRIER_SRC_BYREF) && (tmpReg != REG_WRITE_BARRIER_DST_BYREF)); + assert((slots == 1) || (genIsValidIntReg(tmpReg2) && (tmpReg2 != tmpReg) && (tmpReg2 != REG_WRITE_BARRIER_SRC_BYREF) && (tmpReg2 != REG_WRITE_BARRIER_DST_BYREF))); - unsigned slots = cpObjNode->gtSlots; emitter* emit = getEmitter(); BYTE* gcPtrs = cpObjNode->gtGcPtrs; @@ -3473,7 +3476,6 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) // If we can prove it's on the stack we don't need to use the write barrier. if (dstOnStack) { - // TODO-ARM64-CQ: Consider using LDP/STP to save codesize. for (unsigned i = 0; i < slots; ++i) { emitAttr attr = EA_8BYTE; @@ -3482,10 +3484,21 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) else if (gcPtrs[i] == GCT_BYREF) attr = EA_BYREF; - emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, - INS_OPTS_POST_INDEX); - emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, - INS_OPTS_POST_INDEX); + if((i + 1 < slots) && (gcPtrs[i] == gcPtrs[i + 1])) + { + emit->emitIns_R_R_R_I(INS_ldp, attr, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2*TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + emit->emitIns_R_R_R_I(INS_stp, attr, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2*TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + ++i; + } + else + { + emit->emitIns_R_R_I(INS_ldr, attr, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + emit->emitIns_R_R_I(INS_str, attr, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + } } } else @@ -3498,11 +3511,21 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode) switch (gcPtrs[i]) { case TYPE_GC_NONE: - // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots. - emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, - INS_OPTS_POST_INDEX); - emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, - INS_OPTS_POST_INDEX); + if((i + 1 < slots) && (gcPtrs[i] == gcPtrs[i + 1])) + { + emit->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_SRC_BYREF, 2*TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, tmpReg, tmpReg2, REG_WRITE_BARRIER_DST_BYREF, 2*TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + ++i; + } + else + { + emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, + INS_OPTS_POST_INDEX); + } break; default: diff --git a/src/coreclr/src/jit/lsraarmarch.cpp b/src/coreclr/src/jit/lsraarmarch.cpp index 7d999d8..107c831 100644 --- a/src/coreclr/src/jit/lsraarmarch.cpp +++ b/src/coreclr/src/jit/lsraarmarch.cpp @@ -792,6 +792,14 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. blkNode->gtLsraInfo.internalIntCount = 1; + if (size >= 2 * REGSIZE_BYTES) + { + // Use ldp/stp to reduce code size and improve performance + blkNode->gtLsraInfo.internalIntCount++; + } + + blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ALLINT & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF)); + dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, -- 2.7.4