From c6824d52873d0f6382b4f733c50f510491d37b12 Mon Sep 17 00:00:00 2001 From: mikedn Date: Thu, 30 May 2019 03:53:11 +0300 Subject: [PATCH] Sort out ARM load/store instruction size issues (#20126) * Avoid using ins_Load/ins_Store with constant type * Use ldr to load array lengths/lower bounds genCodeForArrIndex and genCodeForArrOffset emit a ldrsw on ARM64 but that's not necessary. Array lengths are always positive. Lower bounds are signed but then the subsequent subtract is anyway EA_4BYTE so sign extension isn't actually needed. Just use ldr on both ARM32 and ARM64. * Use ldr to load array length (part 2) genCodeForIndexAddr's range check generation is messed up: - It uses ldrsw to load array length on ARM64. Not needed, the length is positive. - It uses sxtw to sign etxend the array length if the index is native int. But it was already extended by the load. - It creates IND and LEA nodes out of thin air. Just call the appropiate emitter functions. - It always generates a TYP_I_IMPL cmp, even when the index is TYP_INT. Well, that's just bogus. * Stop the using the instruction size for immediate scaling on ARM32 The scaling of immediate operands is a property of the instruction and its encoding. It doesnt' make sense to throw the instruction size (emitAttr) into the mix, that's a codegen/emitter specific concept. On ARM32 it's also almost meaningless, at least in the case of integer types - all instructions are really EA_4BYTE, even ldrb, ldrh etc. The ARM64 emitter already extracts the scaling factor from the instruction. It can't use the instruction size as on ARM64 the size is used to select between 32 bit and 64 bit instructions so it's never EA_1BYTE/EA_2BYTE. * Stop using ldrsw for TYP_INT loads ARM64's ins_Load returns INS_ldrsw for TYP_INT but there's nothing in the JIT type system that requires sign extending TYP_INT values on load. The fact that an indir node has TYP_INT doesn't even imply that the value to load is signed, it may be unsigned and indir nodes will never have type TYP_UINT nor have the GTF_UNSIGNED flag set. XARCH uses a mov (rather than movsxd, the equivalent of ldrsw) so it zero extends. There's no reason for ARM64 to behave differently and doing so makes it more difficult to share codegen logic between XARCH and ARM64 Other ARM64 compilers also use ldr rather than ldrsw. This requires patching up emitInsAdjustLoadStoreAttr so EA_4BYTE loads don't end up using EA_8BYTE, which ldrsw requires. * Cleanup genCodeForIndir/genCodeForStoreInd In particular, cleanup selection of acquire/release instructions. The existing code starts by selecting a "normal" instruction, only to throw it away and redo the type/size logic in the volatile case. And get it wrong in the process, it required that "targetType" be TYP_UINT or TYP_LONG to use ldar. But there are no TYP_UINT indirs. Also rename "targetType" to "type", using "target" is misleading. The real target type would be genActualType(tree->TypeGet()). * Remove ARM32/64 load/store instruction size inconsistencies - Require EA_4BYTE for byte/halfword instructions on ARM32. - Remove emitInsAdjustLoadStoreAttr on ARM64. Getting the correct instruction size simply requires using emitActualTypeSize, that will provide the correct size on both ARM32 and ARM64. - Replace emitTypeSize with emitActualTypeSize as needed. * Remove unnecessary insUnscaleImm parameter --- src/jit/codegenarm.cpp | 15 ++--- src/jit/codegenarm64.cpp | 93 ++++++++++++------------------- src/jit/codegenarmarch.cpp | 133 ++++++++++++++++----------------------------- src/jit/codegencommon.cpp | 19 +++---- src/jit/codegenlinear.cpp | 5 +- src/jit/emitarm.cpp | 86 ++++++++++++++--------------- src/jit/emitarm.h | 2 +- src/jit/emitarm64.cpp | 47 +++------------- src/jit/emitarm64.h | 1 - src/jit/instr.cpp | 11 +--- 10 files changed, 148 insertions(+), 264 deletions(-) diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 99ea6ba..93eb16a 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -1287,12 +1287,11 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) // void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) { - GenTree* data = tree->Data(); - GenTree* addr = tree->Addr(); - var_types targetType = tree->TypeGet(); - emitter* emit = getEmitter(); + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + var_types type = tree->TypeGet(); - assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet())); + assert(!varTypeIsFloating(type) || (type == data->TypeGet())); GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); if (writeBarrierForm != GCInfo::WBF_NoBarrier) @@ -1323,8 +1322,6 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) } else // A normal store, not a WriteBarrier store { - bool dataIsUnary = false; - // We must consume the operands in the proper execution order, // so that liveness is updated appropriately. genConsumeAddress(addr); @@ -1334,13 +1331,13 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) genConsumeRegs(data); } - if (tree->gtFlags & GTF_IND_VOLATILE) + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) { // issue a full memory barrier a before volatile StInd instGen_MemoryBarrier(); } - emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(tree), data->gtRegNum, tree); + getEmitter()->emitInsLoadStoreOp(ins_Store(type), emitActualTypeSize(type), data->gtRegNum, tree); } } diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 8681aa8..9f892e6 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -1180,13 +1180,13 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or // function) - genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1, - genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false); + genInstrWithConstant(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, + REG_R2, false); regSet.verifyRegUsed(REG_R1); // Store the PSP value (aka CallerSP) - genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE, - genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false); + genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, + false); // re-establish the frame pointer genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, @@ -1201,8 +1201,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block) -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false); regSet.verifyRegUsed(REG_R3); - genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE, - genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false); + genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, + false); } } } @@ -1509,7 +1509,7 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) if (compiler->lvaPSPSym != BAD_VAR_NUM) { - getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); + getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); } else { @@ -1845,9 +1845,7 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree) assert(targetType != TYP_STRUCT); instruction ins = ins_Load(targetType); - emitAttr attr = emitTypeSize(targetType); - - attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr); + emitAttr attr = emitActualTypeSize(targetType); emit->emitIns_R_S(ins, attr, tree->gtRegNum, varNum, 0); genProduceReg(tree); @@ -1907,9 +1905,7 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree) instruction ins = ins_Store(targetType); - emitAttr attr = emitTypeSize(targetType); - - attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr); + emitAttr attr = emitActualTypeSize(targetType); emit->emitIns_S_R(ins, attr, dataReg, varNum, offset); @@ -1986,9 +1982,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* tree) inst_set_SV_var(tree); instruction ins = ins_Store(targetType); - emitAttr attr = emitTypeSize(targetType); - - attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr); + emitAttr attr = emitActualTypeSize(targetType); emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0); @@ -3208,13 +3202,6 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree) // void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) { - GenTree* data = tree->Data(); - GenTree* addr = tree->Addr(); - var_types targetType = tree->TypeGet(); - emitter* emit = getEmitter(); - emitAttr attr = emitTypeSize(tree); - instruction ins = ins_Store(targetType); - #ifdef FEATURE_SIMD // Storing Vector3 of size 12 bytes through indirection if (tree->TypeGet() == TYP_SIMD12) @@ -3224,6 +3211,9 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) } #endif // FEATURE_SIMD + GenTree* data = tree->Data(); + GenTree* addr = tree->Addr(); + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree, data); if (writeBarrierForm != GCInfo::WBF_NoBarrier) { @@ -3247,8 +3237,6 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) } else // A normal store, not a WriteBarrier store { - bool dataIsUnary = false; - GenTree* nonRMWsrc = nullptr; // We must consume the operands in the proper execution order, // so that liveness is updated appropriately. genConsumeAddress(addr); @@ -3258,7 +3246,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) genConsumeRegs(data); } - regNumber dataReg = REG_NA; + regNumber dataReg; if (data->isContainedIntOrIImmed()) { assert(data->IsIntegralConst(0)); @@ -3270,33 +3258,25 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) dataReg = data->gtRegNum; } - assert((attr != EA_1BYTE) || !(tree->gtFlags & GTF_IND_UNALIGNED)); + var_types type = tree->TypeGet(); + instruction ins = ins_Store(type); - if (tree->gtFlags & GTF_IND_VOLATILE) + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) { - bool useStoreRelease = - genIsValidIntReg(dataReg) && !addr->isContained() && !(tree->gtFlags & GTF_IND_UNALIGNED); + bool addrIsInReg = addr->isUsedFromReg(); + bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0); - if (useStoreRelease) + if ((ins == INS_strb) && addrIsInReg) { - switch (EA_SIZE(attr)) - { - case EA_1BYTE: - assert(ins == INS_strb); - ins = INS_stlrb; - break; - case EA_2BYTE: - assert(ins == INS_strh); - ins = INS_stlrh; - break; - case EA_4BYTE: - case EA_8BYTE: - assert(ins == INS_str); - ins = INS_stlr; - break; - default: - assert(false); // We should not get here - } + ins = INS_stlrb; + } + else if ((ins == INS_strh) && addrIsInReg && addrIsAligned) + { + ins = INS_stlrh; + } + else if ((ins == INS_str) && genIsValidIntReg(dataReg) && addrIsInReg && addrIsAligned) + { + ins = INS_stlr; } else { @@ -3305,7 +3285,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) } } - emit->emitInsLoadStoreOp(ins, attr, dataReg, tree); + getEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), dataReg, tree); } } @@ -4731,9 +4711,6 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) { int offset = (int)index * genTypeSize(baseType); instruction ins = ins_Load(baseType); - baseTypeSize = varTypeIsFloating(baseType) - ? baseTypeSize - : getEmitter()->emitInsAdjustLoadStoreAttr(ins, baseTypeSize); assert(!op1->isUsedFromReg()); @@ -4741,7 +4718,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) { unsigned varNum = op1->gtLclVarCommon.gtLclNum; - getEmitter()->emitIns_R_S(ins, baseTypeSize, targetReg, varNum, offset); + getEmitter()->emitIns_R_S(ins, emitActualTypeSize(baseType), targetReg, varNum, offset); } else { @@ -4752,7 +4729,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) regNumber baseReg = addr->gtRegNum; // ldr targetReg, [baseReg, #offset] - getEmitter()->emitIns_R_R_I(ins, baseTypeSize, targetReg, baseReg, offset); + getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(baseType), targetReg, baseReg, offset); } } else @@ -5047,7 +5024,7 @@ void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) assert(tmpReg != addr->gtRegNum); // 8-byte write - getEmitter()->emitIns_R_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum); + getEmitter()->emitIns_R_R(INS_str, EA_8BYTE, data->gtRegNum, addr->gtRegNum); // Extract upper 4-bytes from data getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, data->gtRegNum, 2); @@ -5083,7 +5060,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) regNumber tmpReg = treeNode->GetSingleTempReg(); // 8-byte read - getEmitter()->emitIns_R_R(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, addr->gtRegNum); + getEmitter()->emitIns_R_R(INS_ldr, EA_8BYTE, targetReg, addr->gtRegNum); // 4-byte read getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, addr->gtRegNum, 8); @@ -5126,7 +5103,7 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) regNumber tmpReg = treeNode->GetSingleTempReg(); // store lower 8 bytes - getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs); + getEmitter()->emitIns_S_R(INS_str, EA_8BYTE, operandReg, varNum, offs); // Extract upper 4-bytes from data getEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, operandReg, 2); diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index 2721bd0..c933f45 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -1585,11 +1585,11 @@ void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) unsigned offset; offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim); - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load + emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, arrReg, offset); emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg); offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim); - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load + emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, arrReg, offset); emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg); genJumpToThrowHlpBlk(EJ_hs, SCK_RNGCHK_FAIL); @@ -1640,7 +1640,7 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) // Load tmpReg with the dimension size and evaluate // tgtReg = offsetReg*tmpReg + indexReg. - emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_PTRSIZE, tmpReg, arrReg, offset); + emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, arrReg, offset); emit->emitIns_R_R_R_R(INS_MULADD, EA_PTRSIZE, tgtReg, tmpReg, offsetReg, indexReg); } else @@ -1736,17 +1736,7 @@ void CodeGen::genCodeForLclFld(GenTreeLclFld* tree) unsigned varNum = tree->gtLclNum; assert(varNum < compiler->lvaCount); - if (varTypeIsFloating(targetType) || varTypeIsSIMD(targetType)) - { - emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offs); - } - else - { -#ifdef _TARGET_ARM64_ - size = EA_SET_SIZE(size, EA_8BYTE); -#endif // _TARGET_ARM64_ - emit->emitIns_R_S(ins_Move_Extend(targetType, false), size, targetReg, varNum, offs); - } + emit->emitIns_R_S(ins_Load(targetType), emitActualTypeSize(targetType), targetReg, varNum, offs); genProduceReg(tree); } @@ -1773,34 +1763,16 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) gcInfo.gcMarkRegPtrVal(base->gtRegNum, base->TypeGet()); assert(!varTypeIsGC(index->TypeGet())); + // The index is never contained, even if it is a constant. + assert(index->isUsedFromReg()); + const regNumber tmpReg = node->GetSingleTempReg(); // Generate the bounds check if necessary. if ((node->gtFlags & GTF_INX_RNGCHK) != 0) { - // Create a GT_IND(GT_LEA)) tree for the array length access and load the length into a register. - GenTreeAddrMode arrLenAddr(base->TypeGet(), base, nullptr, 0, static_cast(node->gtLenOffset)); - arrLenAddr.gtRegNum = REG_NA; - arrLenAddr.SetContained(); - - GenTreeIndir arrLen = indirForm(TYP_INT, &arrLenAddr); - arrLen.gtRegNum = tmpReg; - arrLen.ClearContained(); - - getEmitter()->emitInsLoadStoreOp(ins_Load(TYP_INT), emitTypeSize(TYP_INT), arrLen.gtRegNum, &arrLen); - -#ifdef _TARGET_64BIT_ - // The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case that the index - // is a native int on a 64-bit platform, we will need to widen the array length and the compare. - if (index->TypeGet() == TYP_I_IMPL) - { - // Extend the array length as needed. - getEmitter()->emitIns_R_R(ins_Move_Extend(TYP_INT, true), EA_8BYTE, arrLen.gtRegNum, arrLen.gtRegNum); - } -#endif - - // Generate the range check. - getEmitter()->emitInsBinary(INS_cmp, emitActualTypeSize(TYP_I_IMPL), index, &arrLen); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, base->gtRegNum, node->gtLenOffset); + getEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(index->TypeGet()), index->gtRegNum, tmpReg); genJumpToThrowHlpBlk(EJ_hs, SCK_RNGCHK_FAIL, node->gtIndRngFailBB); } @@ -1842,12 +1814,6 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) { assert(tree->OperIs(GT_IND)); - var_types targetType = tree->TypeGet(); - regNumber targetReg = tree->gtRegNum; - emitter* emit = getEmitter(); - emitAttr attr = emitTypeSize(tree); - instruction ins = ins_Load(targetType); - #ifdef FEATURE_SIMD // Handling of Vector3 type values loaded through indirection. if (tree->TypeGet() == TYP_SIMD12) @@ -1857,55 +1823,48 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) } #endif // FEATURE_SIMD + var_types type = tree->TypeGet(); + instruction ins = ins_Load(type); + regNumber targetReg = tree->gtRegNum; + genConsumeAddress(tree->Addr()); - if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) - { - bool isAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0); - assert((attr != EA_1BYTE) || isAligned); + bool emitBarrier = false; + if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + { #ifdef _TARGET_ARM64_ - GenTree* addr = tree->Addr(); - bool useLoadAcquire = genIsValidIntReg(targetReg) && !addr->isContained() && - (varTypeIsUnsigned(targetType) || varTypeIsI(targetType)) && - !(tree->gtFlags & GTF_IND_UNALIGNED); + bool addrIsInReg = tree->Addr()->isUsedFromReg(); + bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0); - if (useLoadAcquire) + if ((ins == INS_ldrb) && addrIsInReg) { - switch (EA_SIZE(attr)) - { - case EA_1BYTE: - assert(ins == INS_ldrb); - ins = INS_ldarb; - break; - case EA_2BYTE: - assert(ins == INS_ldrh); - ins = INS_ldarh; - break; - case EA_4BYTE: - case EA_8BYTE: - assert(ins == INS_ldr); - ins = INS_ldar; - break; - default: - assert(false); // We should not get here - } + ins = INS_ldarb; + } + else if ((ins == INS_ldrh) && addrIsInReg && addrIsAligned) + { + ins = INS_ldarh; + } + else if ((ins == INS_ldr) && addrIsInReg && addrIsAligned && genIsValidIntReg(targetReg)) + { + ins = INS_ldar; } + else +#endif // _TARGET_ARM64_ + { + emitBarrier = true; + } + } - emit->emitInsLoadStoreOp(ins, attr, targetReg, tree); + getEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree); - if (!useLoadAcquire) // issue a INS_BARRIER_OSHLD after a volatile LdInd operation - instGen_MemoryBarrier(INS_BARRIER_OSHLD); + if (emitBarrier) + { +#ifdef _TARGET_ARM64_ + instGen_MemoryBarrier(INS_BARRIER_OSHLD); #else - emit->emitInsLoadStoreOp(ins, attr, targetReg, tree); - - // issue a full memory barrier after a volatile LdInd operation instGen_MemoryBarrier(); -#endif // _TARGET_ARM64_ - } - else - { - emit->emitInsLoadStoreOp(ins, attr, targetReg, tree); +#endif } genProduceReg(tree); @@ -2074,14 +2033,14 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) { if ((size & 2) != 0) { - genCodeForLoadOffset(INS_ldrh, EA_2BYTE, tmpReg, srcAddr, offset); - genCodeForStoreOffset(INS_strh, EA_2BYTE, tmpReg, dstAddr, offset); + genCodeForLoadOffset(INS_ldrh, EA_4BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_strh, EA_4BYTE, tmpReg, dstAddr, offset); offset += 2; } if ((size & 1) != 0) { - genCodeForLoadOffset(INS_ldrb, EA_1BYTE, tmpReg, srcAddr, offset); - genCodeForStoreOffset(INS_strb, EA_1BYTE, tmpReg, dstAddr, offset); + genCodeForLoadOffset(INS_ldrb, EA_4BYTE, tmpReg, srcAddr, offset); + genCodeForStoreOffset(INS_strb, EA_4BYTE, tmpReg, dstAddr, offset); } } #endif // !_TARGET_ARM64_ @@ -2729,12 +2688,12 @@ void CodeGen::genJmpMethod(GenTree* jmp) if (varDsc->TypeGet() == TYP_LONG) { // long - at least the low half must be enregistered - getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0); + getEmitter()->emitIns_S_R(INS_str, EA_4BYTE, varDsc->lvRegNum, varNum, 0); // Is the upper half also enregistered? if (varDsc->lvOtherReg != REG_STK) { - getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int)); + getEmitter()->emitIns_S_R(INS_str, EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int)); } } else diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 6d0a7eb..4edcda9 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -1689,10 +1689,10 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg) { // Ngen case - GS cookie constant needs to be accessed through an indirection. instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSConst, regGSConst, 0); } // Load this method's GS value from the stack frame - getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); + getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0); // Compare with the GC cookie constant getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue); @@ -6527,7 +6527,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) #if CPU_LOAD_STORE_ARCH instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr); - getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg, reg, 0); regSet.verifyRegUsed(reg); #else // mov reg, dword ptr [compiler->gsGlobalSecurityCookieAddr] @@ -9206,11 +9206,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block) { // This is the first block of a filter - getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1, - genFuncletInfo.fiPSP_slot_CallerSP_offset); + getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiPSP_slot_CallerSP_offset); regSet.verifyRegUsed(REG_R1); - getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE, - genFuncletInfo.fiPSP_slot_SP_offset); + getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset); getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunctionCallerSPtoFPdelta); } @@ -9220,8 +9218,7 @@ void CodeGen::genFuncletProlog(BasicBlock* block) getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, genFuncletInfo.fiFunctionCallerSPtoFPdelta); regSet.verifyRegUsed(REG_R3); - getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE, - genFuncletInfo.fiPSP_slot_SP_offset); + getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset); } } @@ -9882,7 +9879,7 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) *pInitRegZeroed = false; getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs); - getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); + getEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); #elif defined(_TARGET_ARM64_) @@ -9894,7 +9891,7 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) *pInitRegZeroed = false; getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta); - getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); + getEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); #elif defined(_TARGET_AMD64_) diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp index 5842700..88d3ac3 100644 --- a/src/jit/codegenlinear.cpp +++ b/src/jit/codegenlinear.cpp @@ -965,12 +965,9 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) targetType = genActualType(varDsc->lvType); } instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)); - emitAttr attr = emitTypeSize(targetType); + emitAttr attr = emitActualTypeSize(targetType); emitter* emit = getEmitter(); - // Fixes Issue #3326 - attr = varTypeIsFloating(targetType) ? attr : emit->emitInsAdjustLoadStoreAttr(ins, attr); - // Load local variable from its home location. inst_RV_TT(ins, dstReg, unspillTree, 0, attr); #elif defined(_TARGET_ARM_) diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp index ad6bca4..949e66b 100644 --- a/src/jit/emitarm.cpp +++ b/src/jit/emitarm.cpp @@ -188,18 +188,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_T1_C: // T1_C .....iiiiinnnddd R1 R2 imm5 assert(isLowRegister(id->idReg1())); assert(isLowRegister(id->idReg2())); - if (emitInsIsLoadOrStore(id->idIns())) - { - emitAttr size = id->idOpSize(); - int imm = emitGetInsSC(id); - - imm = insUnscaleImm(imm, size); - assert(imm < 0x20); - } - else - { - assert(id->idSmallCns() < 0x20); - } + assert(insUnscaleImm(id->idIns(), emitGetInsSC(id)) < 0x20); break; case IF_T1_D0: // T1_D0 ........Dmmmmddd R1* R2* @@ -2287,14 +2276,14 @@ void emitter::emitIns_R_R( break; case INS_tbb: - assert(size == EA_1BYTE); + assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); fmt = IF_T2_C9; sf = INS_FLAGS_NOT_SET; break; case INS_tbh: - assert(size == EA_2BYTE); + assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); fmt = IF_T2_C9; sf = INS_FLAGS_NOT_SET; @@ -2310,7 +2299,7 @@ void emitter::emitIns_R_R( case INS_ldrexb: case INS_strexb: - assert(size == EA_1BYTE); + assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); fmt = IF_T2_E1; sf = INS_FLAGS_NOT_SET; @@ -2318,7 +2307,7 @@ void emitter::emitIns_R_R( case INS_ldrexh: case INS_strexh: - assert(size == EA_2BYTE); + assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); fmt = IF_T2_E1; sf = INS_FLAGS_NOT_SET; @@ -2763,7 +2752,7 @@ void emitter::emitIns_R_R_I(instruction ins, case INS_ldrb: case INS_strb: - assert(size == EA_1BYTE); + assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); if (isLowRegister(reg1) && isLowRegister(reg2) && insOptsNone(opt) && ((imm & 0x001f) == imm)) @@ -2775,12 +2764,12 @@ void emitter::emitIns_R_R_I(instruction ins, goto COMMON_THUMB2_LDST; case INS_ldrsb: - assert(size == EA_1BYTE); + assert(size == EA_4BYTE); goto COMMON_THUMB2_LDST; case INS_ldrh: case INS_strh: - assert(size == EA_2BYTE); + assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); if (isLowRegister(reg1) && isLowRegister(reg2) && insOptsNone(opt) && ((imm & 0x003e) == imm)) @@ -2792,7 +2781,7 @@ void emitter::emitIns_R_R_I(instruction ins, goto COMMON_THUMB2_LDST; case INS_ldrsh: - assert(size == EA_2BYTE); + assert(size == EA_4BYTE); goto COMMON_THUMB2_LDST; case INS_vldr: @@ -3100,13 +3089,13 @@ void emitter::emitIns_R_R_R(instruction ins, case INS_ldrb: case INS_strb: case INS_ldrsb: - assert(size == EA_1BYTE); + assert(size == EA_4BYTE); goto COMMON_THUMB1_LDST; case INS_ldrsh: case INS_ldrh: case INS_strh: - assert(size == EA_2BYTE); + assert(size == EA_4BYTE); goto COMMON_THUMB1_LDST; case INS_ldr: @@ -3367,13 +3356,13 @@ void emitter::emitIns_R_R_R_I(instruction ins, case INS_ldrb: case INS_ldrsb: case INS_strb: - assert(size == EA_1BYTE); + assert(size == EA_4BYTE); goto COMMON_THUMB2_LDST; case INS_ldrh: case INS_ldrsh: case INS_strh: - assert(size == EA_2BYTE); + assert(size == EA_4BYTE); goto COMMON_THUMB2_LDST; case INS_ldr: @@ -5018,31 +5007,39 @@ inline unsigned insEncodeImmT2_Mov(int imm) return result; } -/***************************************************************************** - * - * Unscales the immediate based on the operand size in 'size' - */ -/*static*/ int emitter::insUnscaleImm(int imm, emitAttr size) +//------------------------------------------------------------------------ +// insUnscaleImm: Unscales the immediate operand of a given IF_T1_C instruction. +// +// Arguments: +// ins - the instruction +// imm - the immediate value to unscale +// +// Return Value: +// The unscaled immediate value +// +/*static*/ int emitter::insUnscaleImm(instruction ins, int imm) { - switch (size) + switch (ins) { - case EA_8BYTE: - case EA_4BYTE: + case INS_ldr: + case INS_str: assert((imm & 0x0003) == 0); imm >>= 2; break; - - case EA_2BYTE: + case INS_ldrh: + case INS_strh: assert((imm & 0x0001) == 0); imm >>= 1; break; - - case EA_1BYTE: + case INS_ldrb: + case INS_strb: + case INS_lsl: + case INS_lsr: + case INS_asr: // Do nothing break; - default: - assert(!"Invalid value in size"); + assert(!"Invalid IF_T1_C instruction"); break; } return imm; @@ -5628,10 +5625,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = emitInsCode(ins, fmt); code |= insEncodeRegT1_D3(id->idReg1()); code |= insEncodeRegT1_N3(id->idReg2()); - if (emitInsIsLoadOrStore(ins)) - { - imm = insUnscaleImm(imm, size); - } + imm = insUnscaleImm(ins, imm); assert((imm & 0x001f) == imm); code |= (imm << 6); dst += emitOutput_Thumb1Instr(dst, code); @@ -5657,7 +5651,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) sz = emitGetInstrDescSize(id); imm = emitGetInsSC(id); code = emitInsCode(ins, fmt); - imm = insUnscaleImm(imm, size); + assert((ins == INS_add) || (ins == INS_sub)); + assert((imm & 0x0003) == 0); + imm >>= 2; assert((imm & 0x007F) == imm); code |= imm; dst += emitOutput_Thumb1Instr(dst, code); @@ -5699,7 +5695,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code |= insEncodeRegT1_DI(id->idReg1()); if (fmt == IF_T1_J2) { - imm = insUnscaleImm(imm, size); + assert((ins == INS_add) || (ins == INS_ldr) || (ins == INS_str)); + assert((imm & 0x0003) == 0); + imm >>= 2; } assert((imm & 0x00ff) == imm); code |= imm; diff --git a/src/jit/emitarm.h b/src/jit/emitarm.h index 1e79af0..2938559 100644 --- a/src/jit/emitarm.h +++ b/src/jit/emitarm.h @@ -96,7 +96,7 @@ void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTr static bool isModImmConst(int imm); static int encodeModImmConst(int imm); -static int insUnscaleImm(int imm, emitAttr size); +static int insUnscaleImm(instruction ins, int imm); /************************************************************************/ /* Public inline informational methods */ diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index 7f0c0df..510bb2e 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -960,35 +960,6 @@ bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id) } } -// For the small loads/store instruction we adjust the size 'attr' -// depending upon whether we have a load or a store -// -emitAttr emitter::emitInsAdjustLoadStoreAttr(instruction ins, emitAttr attr) -{ - if (EA_SIZE(attr) <= EA_4BYTE) - { - if (emitInsIsLoad(ins)) - { - // The value of 'ins' encodes the size to load - // we use EA_8BYTE here because it is the size we will write (into dataReg) - // it is also required when ins is INS_ldrsw - // - attr = EA_8BYTE; - } - else - { - assert(emitInsIsStore(ins)); - - // The value of 'ins' encodes the size to store - // we use EA_4BYTE here because it is the size of the register - // that we want to display when storing small values - // - attr = EA_4BYTE; - } - } - return attr; -} - // Takes an instrDesc 'id' and uses the instruction 'ins' to determine the // size of the target register that is written or read by the instruction. // Note that even if EA_4BYTE is returned a load instruction will still @@ -11580,8 +11551,6 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) // void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir) { - emitAttr ldstAttr = isVectorRegister(dataReg) ? attr : emitInsAdjustLoadStoreAttr(ins, attr); - GenTree* addr = indir->Addr(); if (addr->isContained()) @@ -11630,7 +11599,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); // Then load/store dataReg from/to [tmpReg + offset] - emitIns_R_R_I(ins, ldstAttr, dataReg, tmpReg, offset); + emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset); } else // large offset { @@ -11644,7 +11613,7 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR noway_assert(tmpReg != index->gtRegNum); // Then load/store dataReg from/to [tmpReg + index*scale] - emitIns_R_R_R_I(ins, ldstAttr, dataReg, tmpReg, index->gtRegNum, lsl, INS_OPTS_LSL); + emitIns_R_R_R_I(ins, attr, dataReg, tmpReg, index->gtRegNum, lsl, INS_OPTS_LSL); } } else // (offset == 0) @@ -11652,21 +11621,21 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR if (lsl > 0) { // Then load/store dataReg from/to [memBase + index*scale] - emitIns_R_R_R_I(ins, ldstAttr, dataReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL); + emitIns_R_R_R_I(ins, attr, dataReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL); } else // no scale { // Then load/store dataReg from/to [memBase + index] - emitIns_R_R_R(ins, ldstAttr, dataReg, memBase->gtRegNum, index->gtRegNum); + emitIns_R_R_R(ins, attr, dataReg, memBase->gtRegNum, index->gtRegNum); } } } else // no Index register { - if (emitIns_valid_imm_for_ldst_offset(offset, EA_SIZE(attr))) + if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet()))) { // Then load/store dataReg from/to [memBase + offset] - emitIns_R_R_I(ins, ldstAttr, dataReg, memBase->gtRegNum, offset); + emitIns_R_R_I(ins, attr, dataReg, memBase->gtRegNum, offset); } else { @@ -11677,14 +11646,14 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); // Then load/store dataReg from/to [memBase + tmpReg] - emitIns_R_R_R(ins, ldstAttr, dataReg, memBase->gtRegNum, tmpReg); + emitIns_R_R_R(ins, attr, dataReg, memBase->gtRegNum, tmpReg); } } } else // addr is not contained, so we evaluate it into a register { // Then load/store dataReg from/to [addrReg] - emitIns_R_R(ins, ldstAttr, dataReg, addr->gtRegNum); + emitIns_R_R(ins, attr, dataReg, addr->gtRegNum); } } diff --git a/src/jit/emitarm64.h b/src/jit/emitarm64.h index 2a24b16..ad0f0c6 100644 --- a/src/jit/emitarm64.h +++ b/src/jit/emitarm64.h @@ -84,7 +84,6 @@ bool emitInsIsCompare(instruction ins); bool emitInsIsLoad(instruction ins); bool emitInsIsStore(instruction ins); bool emitInsIsLoadOrStore(instruction ins); -emitAttr emitInsAdjustLoadStoreAttr(instruction ins, emitAttr attr); emitAttr emitInsTargetRegSize(instrDesc* id); emitAttr emitInsLoadStoreSize(instrDesc* id); diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp index ed25454..020a783 100644 --- a/src/jit/instr.cpp +++ b/src/jit/instr.cpp @@ -1713,16 +1713,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* #elif defined(_TARGET_ARMARCH_) if (!varTypeIsSmall(srcType)) { -#if defined(_TARGET_ARM64_) - if (!varTypeIsI(srcType) && !varTypeIsUnsigned(srcType)) - { - ins = INS_ldrsw; - } - else -#endif // defined(_TARGET_ARM64_) - { - ins = INS_ldr; - } + ins = INS_ldr; } else if (varTypeIsByte(srcType)) { -- 2.7.4