assert(Valid());
}
+void emitLocation::SetLocation(insGroup* _ig, unsigned _codePos)
+{
+ ig = _ig;
+ codePos = _codePos;
+
+ assert(Valid());
+}
+
+void emitLocation::SetLocation(emitLocation newLocation)
+{
+ ig = newLocation.ig;
+ codePos = newLocation.codePos;
+
+ assert(Valid());
+}
+
bool emitLocation::IsCurrentLocation(emitter* emit) const
{
assert(Valid());
return emitGetInsNumFromCodePos(codePos);
}
+int emitLocation::GetInsOffset() const
+{
+ return emitGetInsOfsFromCodePos(codePos);
+}
+
// Get the instruction offset in the current instruction group, which must be a funclet prolog group.
// This is used to find an instruction offset used in unwind data.
// TODO-AMD64-Bug?: We only support a single main function prolog group, but allow for multiple funclet prolog
assert((ig->igFlags & IGF_PLACEHOLDER) == 0);
ig->igData = id;
+ INDEBUG(ig->igDataSize = gs;)
memcpy(id, emitCurIGfreeBase, sz);
{
of = ig->igSize;
}
+#ifdef TARGET_ARM64
+ else if ((ig->igFlags & IGF_HAS_REMOVED_INSTR) != 0 && no == ig->igInsCnt + 1U)
+ {
+ // This can happen if a instruction was replaced, but the replacement couldn't fit into
+ // the same IG and instead was place in a new IG.
+ return ig->igNext->igOffs + emitFindOffset(ig->igNext, 1);
+ }
+#endif
else if (ig->igFlags & IGF_UPD_ISZ)
{
/*
// printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos);
/* Make sure the offset estimate is accurate */
-
assert(of == emitFindOffset(ig, emitGetInsNumFromCodePos(codePos)));
}
#endif
}
+//------------------------------------------------------------------------
+// emitRemoveLastInstruction: Remove the last instruction emitted; it has been optimized away by the
+// next instruction we are generating. `emitLastIns` must be non-null, meaning there is a
+// previous instruction. The previous instruction might have already been saved, or it might
+// be in the currently accumulating insGroup buffer.
+//
+// The `emitLastIns` is set to nullptr after this function. It is expected that a new instruction
+// will be immediately generated after this, which will set it again.
+//
+// Removing an instruction can invalidate any captured emitter location
+// (using emitLocation::CaptureLocation()) after the instruction was generated. This is because the
+// emitLocation stores the current IG instruction number and code size. If the instruction is
+// removed and not replaced (e.g., it is at the end of the IG, and any replacement creates a new
+// EXTEND IG), then the saved instruction number is incorrect. The IGF_HAS_REMOVED_INSTR flag is
+// used to check for this later.
+//
+// NOTE: It is expected that the GC effect of the removed instruction will be handled by the newly
+// generated replacement(s).
+//
+#ifdef TARGET_ARM64
+void emitter::emitRemoveLastInstruction()
+{
+ assert(emitLastIns != nullptr);
+ assert(emitLastInsIG != nullptr);
+
+ JITDUMP("Removing saved instruction in %s:\n> ", emitLabelString(emitLastInsIG));
+ JITDUMPEXEC(dispIns(emitLastIns))
+
+ // We should assert it's not a jmp, as that would require updating the jump lists, e.g. emitCurIGjmpList.
+
+ BYTE* lastInsActualStartAddr = (BYTE*)emitLastIns - m_debugInfoSize;
+ unsigned short lastCodeSize = (unsigned short)emitLastIns->idCodeSize();
+
+ // Check that a new buffer hasn't been create since the last instruction was emitted.
+ assert((emitCurIGfreeBase <= lastInsActualStartAddr) && (lastInsActualStartAddr < emitCurIGfreeEndp));
+
+ // Ensure the current IG is non-empty.
+ assert(emitCurIGnonEmpty());
+ assert(lastInsActualStartAddr < emitCurIGfreeNext);
+ assert(emitCurIGinsCnt >= 1);
+ assert(emitCurIGsize >= emitLastIns->idCodeSize());
+
+ size_t insSize = emitCurIGfreeNext - lastInsActualStartAddr;
+
+ emitCurIGfreeNext = lastInsActualStartAddr;
+ emitCurIGinsCnt -= 1;
+ emitInsCount -= 1;
+ emitCurIGsize -= lastCodeSize;
+
+ // We're going to overwrite the memory; zero it.
+ memset(emitCurIGfreeNext, 0, insSize);
+
+ // Remember this happened.
+ emitCurIG->igFlags |= IGF_HAS_REMOVED_INSTR;
+
+ emitLastIns = nullptr;
+ emitLastInsIG = nullptr;
+}
+#endif
+
/*****************************************************************************
*
* emitGetInsSC: Get the instruction's constant value.
{
}
+ emitLocation(insGroup* _ig, unsigned _codePos)
+ {
+ SetLocation(_ig, _codePos);
+ }
+
+ emitLocation(emitter* emit)
+ {
+ CaptureLocation(emit);
+ }
+
emitLocation(void* emitCookie) : ig((insGroup*)emitCookie), codePos(0)
{
}
}
void CaptureLocation(emitter* emit);
+ void SetLocation(insGroup* _ig, unsigned _codePos);
+ void SetLocation(emitLocation newLocation);
bool IsCurrentLocation(emitter* emit) const;
}
int GetInsNum() const;
+ int GetInsOffset() const;
bool operator!=(const emitLocation& other) const
{
#ifdef DEBUG
BasicBlock* lastGeneratedBlock; // The last block that generated code into this insGroup.
jitstd::list<BasicBlock*> igBlocks; // All the blocks that generated code into this insGroup.
+ size_t igDataSize; // size of instrDesc data pointed to by 'igData'
#endif
UNATIVE_OFFSET igNum; // for ordering (and display) purposes
#define IGF_REMOVED_ALIGN 0x0800 // IG was marked as having an alignment instruction(s), but was later unmarked
// without updating the IG's size/offsets.
#define IGF_HAS_REMOVABLE_JMP 0x1000 // this group ends with an unconditional jump which is a candidate for removal
+#ifdef TARGET_ARM64
+#define IGF_HAS_REMOVED_INSTR 0x2000 // this group has an instruction that was removed.
+#endif
// Mask of IGF_* flags that should be propagated to new blocks when they are created.
// This allows prologs and epilogs to be any number of IGs, but still be
insGroup* emitSavIG(bool emitAdd = false);
void emitNxtIG(bool extend = false);
+#ifdef TARGET_ARM64
+ void emitRemoveLastInstruction();
+#endif
+
bool emitCurIGnonEmpty()
{
return (emitCurIG && emitCurIGfreeNext > emitCurIGfreeBase);
inline unsigned emitter::emitCurOffset()
{
- unsigned codePos = emitCurIGinsCnt + (emitCurIGsize << 16);
+ return emitSpecifiedOffset(emitCurIGinsCnt, emitCurIGsize);
+}
- assert(emitGetInsOfsFromCodePos(codePos) == emitCurIGsize);
- assert(emitGetInsNumFromCodePos(codePos) == emitCurIGinsCnt);
+inline unsigned emitter::emitSpecifiedOffset(unsigned insCount, unsigned igSize)
+{
+ unsigned codePos = insCount + (igSize << 16);
- // printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos);
+ assert(emitGetInsOfsFromCodePos(codePos) == igSize);
+ assert(emitGetInsNumFromCodePos(codePos) == insCount);
return codePos;
}
emitAttr elemsize = EA_UNKNOWN;
insFormat fmt = IF_NONE;
bool isLdSt = false;
+ bool isLdrStr = false;
bool isSIMD = false;
bool isAddSub = false;
bool setFlags = false;
unscaledOp = false;
scale = NaturalScale_helper(size);
isLdSt = true;
+ isLdrStr = true;
break;
case INS_ldur:
}
}
- // Is the ldr/str even necessary?
- // For volatile load/store, there will be memory barrier instruction before/after the load/store
- // and in such case, IsRedundantLdStr() returns false, because the method just checks for load/store
- // pair next to each other.
- if (emitComp->opts.OptimizationEnabled() && IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt))
+ // Try to optimize a load/store with an alternative instruction.
+ if (isLdrStr && emitComp->opts.OptimizationEnabled() && OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt))
{
return;
}
scale = (size == EA_8BYTE) ? 3 : 2;
}
isLdSt = true;
+ fmt = IF_LS_3C;
break;
case INS_ld1:
assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A");
}
}
+
assert(fmt != IF_NONE);
instrDesc* id = emitNewInstrCns(attr, imm);
*/
void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
{
- emitAttr size = EA_SIZE(attr);
- insFormat fmt = IF_NONE;
- int disp = 0;
- unsigned scale = 0;
+ emitAttr size = EA_SIZE(attr);
+ insFormat fmt = IF_NONE;
+ int disp = 0;
+ unsigned scale = 0;
+ bool isLdrStr = false;
assert(offs >= 0);
case INS_str:
case INS_ldr:
assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size));
- scale = genLog2(EA_SIZE_IN_BYTES(size));
+ scale = genLog2(EA_SIZE_IN_BYTES(size));
+ isLdrStr = true;
break;
case INS_lea:
{
bool useRegForImm = false;
ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
-
- imm = disp;
+ imm = disp;
if (imm == 0)
{
fmt = IF_LS_2A;
}
}
- // Is the ldr/str even necessary?
- if (emitComp->opts.OptimizationEnabled() && IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt))
+ assert(fmt != IF_NONE);
+
+ // Try to optimize a load/store with an alternative instruction.
+ if (isLdrStr && emitComp->opts.OptimizationEnabled() &&
+ OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs))
{
return;
}
- assert(fmt != IF_NONE);
-
instrDesc* id = emitNewInstrCns(attr, imm);
id->idIns(ins);
int disp = 0;
unsigned scale = 0;
bool isVectorStore = false;
+ bool isStr = false;
// TODO-ARM64-CQ: use unscaled loads?
/* Figure out the encoding format of the instruction */
scale = NaturalScale_helper(size);
isVectorStore = true;
}
+ isStr = true;
break;
default:
fmt = IF_LS_3A;
}
- // Is the ldr/str even necessary?
- if (emitComp->opts.OptimizationEnabled() && IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt))
+ assert(fmt != IF_NONE);
+
+ // Try to optimize a store with an alternative instruction.
+ if (isStr && emitComp->opts.OptimizationEnabled() &&
+ OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs))
{
return;
}
- assert(fmt != IF_NONE);
-
instrDesc* id = emitNewInstrCns(attr, imm);
id->idIns(ins);
// Backward branches using instruction count must be within the same instruction group.
assert(insNum + 1 >= (unsigned)(-instrCount));
}
+
dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount));
dstAddr = emitOffsetToPtr(dstOffs);
}
//
// str x1, [x2, #56]
// ldr x1, [x2, #56] <-- redundant
-
+//
// Arguments:
// ins - The current instruction
// dst - The current destination
// imm - Immediate offset
// size - Operand size
// fmt - Format of instruction
+//
// Return Value:
// true if previous instruction already has desired value in register/memory location.
-
+//
+// Notes:
+// For volatile load/store, there will be memory barrier instruction before/after the load/store
+// and in such case, this method returns false, because the method just checks for load/store
+// pair next to each other.
+//
bool emitter::IsRedundantLdStr(
instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
{
- if (((ins != INS_ldr) && (ins != INS_str)) || !emitCanPeepholeLastIns())
+ if ((ins != INS_ldr) && (ins != INS_str))
{
return false;
}
regNumber prevReg2 = emitLastIns->idReg2();
insFormat lastInsfmt = emitLastIns->idInsFmt();
emitAttr prevSize = emitLastIns->idOpSize();
- ssize_t prevImm = emitLastIns->idIsLargeCns() ? ((instrDescCns*)emitLastIns)->idcCnsVal : emitLastIns->idSmallCns();
+ ssize_t prevImm = emitGetInsSC(emitLastIns);
// Only optimize if:
// 1. "base" or "base plus immediate offset" addressing modes.
return false;
}
+
+//-----------------------------------------------------------------------------------
+// ReplaceLdrStrWithPairInstr: Potentially, overwrite a previously-emitted "ldr" or "str"
+// instruction with an "ldp" or "stp" instruction.
+//
+// Arguments:
+// ins - The instruction code
+// reg1Attr - The emit attribute for register 1
+// reg1 - Register 1
+// reg2 - Encoded register 2
+// imm - Immediate offset, prior to scaling by operand size
+// size - Operand size
+// fmt - Instruction format
+//
+// Return Value:
+// "true" if the previous instruction has been overwritten.
+//
+bool emitter::ReplaceLdrStrWithPairInstr(
+ instruction ins, emitAttr reg1Attr, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
+{
+ // Register 2 needs conversion to unencoded value.
+ reg2 = encodingZRtoSP(reg2);
+
+ RegisterOrder optimizationOrder = IsOptimizableLdrStrWithPair(ins, reg1, reg2, imm, size, fmt);
+
+ if (optimizationOrder != eRO_none)
+ {
+ regNumber oldReg1 = emitLastIns->idReg1();
+
+ ssize_t oldImm = emitGetInsSC(emitLastIns);
+ instruction optIns = (ins == INS_ldr) ? INS_ldp : INS_stp;
+
+ emitAttr oldReg1Attr;
+ switch (emitLastIns->idGCref())
+ {
+ case GCT_GCREF:
+ oldReg1Attr = EA_GCREF;
+ break;
+ case GCT_BYREF:
+ oldReg1Attr = EA_BYREF;
+ break;
+ default:
+ oldReg1Attr = emitLastIns->idOpSize();
+ break;
+ }
+
+ // Remove the last instruction written.
+ emitRemoveLastInstruction();
+
+ // Emit the new instruction. Make sure to scale the immediate value by the operand size.
+ if (optimizationOrder == eRO_ascending)
+ {
+ // The FIRST register is at the lower offset
+ emitIns_R_R_R_I(optIns, oldReg1Attr, oldReg1, reg1, reg2, oldImm * size, INS_OPTS_NONE, reg1Attr);
+ }
+ else
+ {
+ // The SECOND register is at the lower offset
+ emitIns_R_R_R_I(optIns, reg1Attr, reg1, oldReg1, reg2, imm * size, INS_OPTS_NONE, oldReg1Attr);
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+//-----------------------------------------------------------------------------------
+// IsOptimizableLdrStrWithPair: Check if it is possible to optimize two "ldr" or "str"
+// instructions into a single "ldp" or "stp" instruction.
+//
+// Examples: ldr w1, [x20, #0x10]
+// ldr w2, [x20, #0x14] => ldp w1, w2, [x20, #0x10]
+//
+// ldr w1, [x20, #0x14]
+// ldr w2, [x20, #0x10] => ldp w2, w1, [x20, #0x10]
+//
+// Arguments:
+// ins - The instruction code
+// reg1 - Register 1 number
+// reg2 - Register 2 number
+// imm - Immediate offset, prior to scaling by operand size
+// size - Operand size
+// fmt - Instruction format
+//
+// Return Value:
+// eRO_none - No optimization of consecutive instructions is possible
+// eRO_ascending - Registers can be loaded/ stored into ascending store locations
+// eRO_descending - Registers can be loaded/ stored into decending store locations.
+//
+emitter::RegisterOrder emitter::IsOptimizableLdrStrWithPair(
+ instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt)
+{
+ RegisterOrder optimisationOrder = eRO_none;
+
+ if ((ins != INS_ldr) && (ins != INS_str))
+ {
+ return eRO_none;
+ }
+
+ if (ins != emitLastIns->idIns())
+ {
+ // Not successive ldr or str instructions
+ return eRO_none;
+ }
+
+ regNumber prevReg1 = emitLastIns->idReg1();
+ regNumber prevReg2 = emitLastIns->idReg2();
+ insFormat lastInsFmt = emitLastIns->idInsFmt();
+ emitAttr prevSize = emitLastIns->idOpSize();
+ ssize_t prevImm = emitGetInsSC(emitLastIns);
+
+ // Signed, *raw* immediate value fits in 7 bits, so for LDP/ STP the raw value is from -64 to +63.
+ // For LDR/ STR, there are 9 bits, so we need to limit the range explicitly in software.
+ if ((imm < -64) || (imm > 63) || (prevImm < -64) || (prevImm > 63))
+ {
+ // Then one or more of the immediate values is out of range, so we cannot optimise.
+ return eRO_none;
+ }
+
+ if ((!isGeneralRegisterOrZR(reg1)) || (!isGeneralRegisterOrZR(prevReg1)))
+ {
+ // Either register 1 is not a general register or previous register 1 is not a general register
+ // or the zero register, so we cannot optimise.
+ return eRO_none;
+ }
+
+ if (lastInsFmt != fmt)
+ {
+ // The formats of the two instructions differ.
+ return eRO_none;
+ }
+
+ if ((emitInsIsLoad(ins)) && (prevReg1 == prevReg2))
+ {
+ // Then the previous load overwrote the register that we are indexing against.
+ return eRO_none;
+ }
+
+ if ((emitInsIsLoad(ins)) && (reg1 == prevReg1))
+ {
+ // Cannot load to the same register twice.
+ return eRO_none;
+ }
+
+ if (prevSize != size)
+ {
+ // Operand sizes differ.
+ return eRO_none;
+ }
+
+ // There are two possible orders for consecutive registers.
+ // These may be stored to or loaded from increasing or
+ // decreasing store locations.
+ if (imm == (prevImm + 1))
+ {
+ // Previous Register 1 is at a higher offset than This Register 1
+ optimisationOrder = eRO_ascending;
+ }
+ else if (imm == (prevImm - 1))
+ {
+ // Previous Register 1 is at a lower offset than This Register 1
+ optimisationOrder = eRO_descending;
+ }
+ else
+ {
+ // Not consecutive immediate values.
+ return eRO_none;
+ }
+
+ if ((reg2 != prevReg2) || !isGeneralRegisterOrSP(reg2))
+ {
+ // The "register 2" should be same as previous instruction and should either be a general
+ // register or stack pointer.
+ return eRO_none;
+ }
+
+ // Don't remove instructions whilst in prologs or epilogs, as these contain "unwindable"
+ // parts, where we need to report unwind codes to the OS,
+ if (emitIGisInProlog(emitCurIG) || emitIGisInEpilog(emitCurIG))
+ {
+ return eRO_none;
+ }
+#ifdef FEATURE_EH_FUNCLETS
+ if (emitIGisInFuncletProlog(emitCurIG) || emitIGisInFuncletEpilog(emitCurIG))
+ {
+ return eRO_none;
+ }
+#endif
+
+ return optimisationOrder;
+}
+
#endif // defined(TARGET_ARM64)
emitAttr retSize,
emitAttr secondRetSize);
+/************************************************************************/
+/* enum to allow instruction optimisation to specify register order */
+/************************************************************************/
+
+enum RegisterOrder
+{
+ eRO_none = 0,
+ eRO_ascending,
+ eRO_descending
+};
+
/************************************************************************/
/* Private helpers for instruction output */
/************************************************************************/
// If yes, the caller of this method can choose to omit current mov instruction.
static bool IsMovInstruction(instruction ins);
bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src, bool canSkip);
+
+// Methods to optimize a Ldr or Str with an alternative instruction.
bool IsRedundantLdStr(instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);
+RegisterOrder IsOptimizableLdrStrWithPair(
+ instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);
+bool ReplaceLdrStrWithPairInstr(
+ instruction ins, emitAttr reg1Attr, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt);
+
+// Try to optimize a Ldr or Str with an alternative instruction.
+inline bool OptimizeLdrStr(instruction ins,
+ emitAttr reg1Attr,
+ regNumber reg1,
+ regNumber reg2,
+ ssize_t imm,
+ emitAttr size,
+ insFormat fmt,
+ bool localVar = false,
+ int varx = 0,
+ int offs = 0)
+{
+ assert(ins == INS_ldr || ins == INS_str);
+
+ if (!emitCanPeepholeLastIns())
+ {
+ return false;
+ }
+
+ // Is the ldr/str even necessary?
+ if (IsRedundantLdStr(ins, reg1, reg2, imm, size, fmt))
+ {
+ return true;
+ }
+
+ // If the previous instruction was a matching load/store, then try to replace it instead of emitting.
+ // Don't do this if either instruction had a local variable.
+ if ((emitLastIns->idIns() == ins) && !localVar && !emitLastIns->idIsLclVar() &&
+ ReplaceLdrStrWithPairInstr(ins, reg1Attr, reg1, reg2, imm, size, fmt))
+ {
+ return true;
+ }
+
+ return false;
+}
/************************************************************************
*