From f030fee1051681361156ac54bec74988cb609f08 Mon Sep 17 00:00:00 2001 From: Grzegorz Tomasz Czarnecki Date: Sun, 17 Dec 2023 00:12:00 +0100 Subject: [PATCH] [RISC-V] Add branch label names to disasm (#96057) * Implemented emitDispIns for riscv * Modified emitDispIns name * Fixed missed case * Added assert * Fixed todo * Added int to jitprintf * Added prototype of the emit disp ins * Fixes in emit dis ins name * Reinforced types * Removed useless ifdef statement from emit * Fixed bug in emit disp ins * Added release mode emit disp * Formatted riscv64 * [RISC-V] Added todo comment * [RISC-V] Applied format patch * [RISC-V] Undo the emit.cpp dispIns changes * [RISC-V] Fixed formatting * Removed dead code * Added emitDispInsDebugOnlyInfo * Added preliminary emitOutputInstrJump * Preliminary emitOutputInstrJump impl * TmpSave * [RISC-V] Changes after review * Fixes after merge * Fixed comment in emmit.h * Added label printing * Removed dead code * Improved emitOutputInstrJumpSize * Fixed bugs * Fixed bug in emitOutputInstrJumpSize * Added prelimary barch offset printing and reinforced some emitter methods * Further reinforced disp functions * Splitted emitOutputInstrJumpSize * Formatted code * Last fixes before pr --------- Co-authored-by: Grzegorz Czarnecki --- src/coreclr/jit/emit.cpp | 12 +-- src/coreclr/jit/emit.h | 16 +-- src/coreclr/jit/emitriscv64.cpp | 227 +++++++++++++++++++++++++++++----------- src/coreclr/jit/emitriscv64.h | 21 +++- 4 files changed, 200 insertions(+), 76 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 13b8b91..bbf5b95 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -2960,7 +2960,7 @@ void* emitter::emitAddInlineLabel() // emitPrintLabel: Print the assembly label for an insGroup. We could use emitter::emitLabelString() // to be consistent, but that seems silly. // -void emitter::emitPrintLabel(insGroup* ig) +void emitter::emitPrintLabel(const insGroup* ig) const { printf("G_M%03u_IG%02u", emitComp->compMethodID, ig->igNum); } @@ -2973,7 +2973,7 @@ void emitter::emitPrintLabel(insGroup* ig) // Returns: // String with insGroup label // -const char* emitter::emitLabelString(insGroup* ig) +const char* emitter::emitLabelString(const insGroup* ig) const { const int TEMP_BUFFER_LEN = 40; static unsigned curBuf = 0; @@ -4337,7 +4337,7 @@ void emitter::emitDispJumpList() // id - the pointer to the current instrDesc // idSize - the size of the current instrDesc // -void emitter::emitAdvanceInstrDesc(instrDesc** id, size_t idSize) +void emitter::emitAdvanceInstrDesc(instrDesc** id, size_t idSize) const { assert(idSize == emitSizeOfInsDsc(*id)); char* idData = reinterpret_cast(*id); @@ -4355,7 +4355,7 @@ void emitter::emitAdvanceInstrDesc(instrDesc** id, size_t idSize) // Returns: // A pointer to the first instrDesc. // -emitter::instrDesc* emitter::emitFirstInstrDesc(BYTE* idData) +emitter::instrDesc* emitter::emitFirstInstrDesc(BYTE* idData) const { return reinterpret_cast(idData + m_debugInfoSize); } @@ -7676,7 +7676,7 @@ void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp) * instruction number for this instruction */ -unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) +unsigned emitter::emitFindInsNum(const insGroup* ig, const instrDesc* idMatch) const { instrDesc* id = emitFirstInstrDesc(ig->igData); @@ -7712,7 +7712,7 @@ unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch) * to find the true offset by looking for the instruction within the group. */ -UNATIVE_OFFSET emitter::emitFindOffset(insGroup* ig, unsigned insNum) +UNATIVE_OFFSET emitter::emitFindOffset(const insGroup* ig, unsigned insNum) const { instrDesc* id = emitFirstInstrDesc(ig->igData); UNATIVE_OFFSET of = 0; diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 9312a14..20763a1 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2185,8 +2185,8 @@ public: /* Methods to record a code position and later convert to offset */ /************************************************************************/ - unsigned emitFindInsNum(insGroup* ig, instrDesc* id); - UNATIVE_OFFSET emitFindOffset(insGroup* ig, unsigned insNum); + unsigned emitFindInsNum(const insGroup* ig, const instrDesc* id) const; + UNATIVE_OFFSET emitFindOffset(const insGroup* ig, unsigned insNum) const; /************************************************************************/ /* Members and methods used to issue (encode) instructions. */ @@ -2205,7 +2205,7 @@ public: UNATIVE_OFFSET emitTotalHotCodeSize; UNATIVE_OFFSET emitTotalColdCodeSize; - UNATIVE_OFFSET emitCurCodeOffs(const BYTE* dst) + UNATIVE_OFFSET emitCurCodeOffs(const BYTE* dst) const { size_t distance; if ((dst >= emitCodeBlock) && (dst <= (emitCodeBlock + emitTotalHotCodeSize))) @@ -2224,7 +2224,7 @@ public: return (UNATIVE_OFFSET)distance; } - BYTE* emitOffsetToPtr(UNATIVE_OFFSET offset) + BYTE* emitOffsetToPtr(UNATIVE_OFFSET offset) const { if (offset < emitTotalHotCodeSize) { @@ -2280,8 +2280,8 @@ public: unsigned int emitCounts_INS_OPTS_J; #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 - instrDesc* emitFirstInstrDesc(BYTE* idData); - void emitAdvanceInstrDesc(instrDesc** id, size_t idSize); + instrDesc* emitFirstInstrDesc(BYTE* idData) const; + void emitAdvanceInstrDesc(instrDesc** id, size_t idSize) const; size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp); size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp); @@ -2697,8 +2697,8 @@ private: // continues to track GC info as if there was no label. void* emitAddInlineLabel(); - void emitPrintLabel(insGroup* ig); - const char* emitLabelString(insGroup* ig); + void emitPrintLabel(const insGroup* ig) const; + const char* emitLabelString(const insGroup* ig) const; #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 411a653..e291f27 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -1936,11 +1936,8 @@ AGAIN: assert(jmpDist >= 0); // Forward jump assert(!(jmpDist & 0x3)); - if (isLinkingEnd & 0x2) - { - jmp->idAddr()->iiaSetJmpOffset(jmpDist); - } - else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J || jmp->idInsOpt() == INS_OPTS_J_cond)) + if (!(isLinkingEnd & 0x2) && (extra > 0) && + (jmp->idInsOpt() == INS_OPTS_J || jmp->idInsOpt() == INS_OPTS_J_cond)) { // transform forward INS_OPTS_J/INS_OPTS_J_cond jump when jmpDist exceed the maximum short distance instruction ins = jmp->idIns(); @@ -2013,11 +2010,8 @@ AGAIN: assert(jmpDist >= 0); // Backward jump assert(!(jmpDist & 0x3)); - if (isLinkingEnd & 0x2) - { - jmp->idAddr()->iiaSetJmpOffset(-jmpDist); // Backward jump is negative! - } - else if ((extra > 0) && (jmp->idInsOpt() == INS_OPTS_J || jmp->idInsOpt() == INS_OPTS_J_cond)) + if (!(isLinkingEnd & 0x2) && (extra > 0) && + (jmp->idInsOpt() == INS_OPTS_J || jmp->idInsOpt() == INS_OPTS_J_cond)) { // transform backward INS_OPTS_J/INS_OPTS_J_cond jump when jmpDist exceed the maximum short distance instruction ins = jmp->idIns(); @@ -2118,17 +2112,75 @@ AGAIN: * Emit a 32-bit RISCV64 instruction */ -/*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) +unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) { assert(sizeof(code_t) == 4); - BYTE* dstRW = dst + writeableOffset; - *((code_t*)dstRW) = code; - + memcpy(dst + writeableOffset, &code, sizeof(code_t)); return sizeof(code_t); } +void emitter::emitOutputInstrJumpDistanceHelper(const insGroup* ig, + instrDescJmp* jmp, + UNATIVE_OFFSET& dstOffs, + const BYTE*& dstAddr) const +{ + // TODO-RISCV64-BUG: iiaEncodedInstrCount is not set by the riscv impl making distinguishing the jumps to label and + // an instruction-count based jumps impossible + if (jmp->idAddr()->iiaHasInstrCount()) + { + assert(ig != nullptr); + int instrCount = jmp->idAddr()->iiaGetInstrCount(); + unsigned insNum = emitFindInsNum(ig, jmp); + if (instrCount < 0) + { + // Backward branches using instruction count must be within the same instruction group. + assert(insNum + 1 >= static_cast(-instrCount)); + } + dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount)); + dstAddr = emitOffsetToPtr(dstOffs); + return; + } + dstOffs = jmp->idAddr()->iiaIGlabel->igOffs; + dstAddr = emitOffsetToPtr(dstOffs); +} + +ssize_t emitter::emitOutputInstrJumpDistance(const BYTE* dst, const BYTE* src, const insGroup* ig, instrDescJmp* jmp) +{ + UNATIVE_OFFSET srcOffs = emitCurCodeOffs(src); + const BYTE* srcAddr = emitOffsetToPtr(srcOffs); + + assert(!jmp->idAddr()->iiaIsJitDataOffset()); // not used by riscv64 impl + + UNATIVE_OFFSET dstOffs{}; + const BYTE* dstAddr = nullptr; + emitOutputInstrJumpDistanceHelper(ig, jmp, dstOffs, dstAddr); + + ssize_t distVal = static_cast(dstAddr - srcAddr); + + if (dstOffs > srcOffs) + { + // This is a forward jump + + emitFwdJumps = true; + + // The target offset will be closer by at least 'emitOffsAdj', but only if this + // jump doesn't cross the hot-cold boundary. + if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs)) + { + distVal -= emitOffsAdj; + dstOffs -= emitOffsAdj; + } + jmp->idjOffs = dstOffs; + if (jmp->idjOffs != dstOffs) + { + IMPL_LIMITATION("Method is too large"); + } + } + return distVal; +} + /***************************************************************************** -* + * * Append the machine code corresponding to the given instruction descriptor * to the code block at '*dp'; the base of the code block is 'bp', and 'ig' * is the instruction group that contains the instruction. Updates '*dp' to @@ -2138,6 +2190,7 @@ AGAIN: size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { + BYTE* const dst = *dp; BYTE* dstRW = *dp + writeableOffset; BYTE* dstRW2 = dstRW + 4; // addr for updating gc info if needed. const BYTE* const odstRW = dstRW; @@ -2519,7 +2572,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) regNumber reg1 = id->idReg1(); { - ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); + ssize_t imm = emitOutputInstrJumpDistance(dstRW, dst, ig, jmp); imm -= 4; assert((imm & 0x3) == 0); @@ -2688,7 +2741,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; case INS_OPTS_J_cond: { - ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. + ssize_t imm = emitOutputInstrJumpDistance(dstRW, dst, ig, static_cast(id)); assert(isValidSimm13(imm)); assert(!(imm & 1)); @@ -2709,7 +2762,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case INS_OPTS_J: // jal/j/jalr/bnez/beqz/beq/bne/blt/bge/bltu/bgeu dstRW-relative. { - ssize_t imm = (ssize_t)id->idAddr()->iiaGetJmpOffset(); // get jmp's offset relative delay-slot. + ssize_t imm = emitOutputInstrJumpDistance(dstRW, dst, ig, static_cast(id)); assert((imm & 3) == 0); ins = id->idIns(); @@ -2906,6 +2959,91 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) return sz; } +bool emitter::emitDispBranchInstrType(unsigned opcode2) const +{ + switch (opcode2) + { + case 0: + printf("beq "); + break; + case 1: + printf("bne "); + break; + case 4: + printf("blt "); + break; + case 5: + printf("bge "); + break; + case 6: + printf("bltu"); + break; + case 7: + printf("bgeu"); + break; + default: + return false; + } + return true; +} + +void emitter::emitDispBranchOffset(const instrDesc* id, const insGroup* ig) const +{ + static const auto signFn = [](int offset) { return offset >= 0 ? "+" : ""; }; + + int instrCount = id->idAddr()->iiaGetInstrCount(); + if (ig == nullptr) + { + printf("pc%s%d instructions", signFn(instrCount), instrCount); + return; + } + unsigned insNum = emitFindInsNum(ig, id); + UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1); + UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount); + ssize_t relOffs = static_cast(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(dstOffs)); + printf("pc%s%d (%d instructions)", signFn(relOffs), static_cast(relOffs), instrCount); +} + +void emitter::emitDispBranchLabel(const instrDesc* id) const +{ + if (id->idIsBound()) + { + return emitPrintLabel(id->idAddr()->iiaIGlabel); + } + printf("L_M%03u_", FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum); +} + +bool emitter::emitDispBranch(unsigned opcode2, + const char* register1Name, + const char* register2Name, + const instrDesc* id, + const insGroup* ig) const +{ + if (!emitDispBranchInstrType(opcode2)) + { + return false; + } + printf(" %s, %s, ", register1Name, register2Name); + assert(id != nullptr); + if (id->idAddr()->iiaHasInstrCount()) + { + // Branch is jumping to some non-labeled offset + emitDispBranchOffset(id, ig); + } + else + { + // Branch is jumping to the labeled offset + emitDispBranchLabel(id); + } + printf("\n"); + return true; +} + +void emitter::emitDispIllegalInstruction(code_t instructionCode) +{ + printf("RISCV64 illegal instruction: 0x%08X\n", instructionCode); +} + /*****************************************************************************/ /*****************************************************************************/ @@ -2929,12 +3067,14 @@ static const char* const RegNames[] = // doffs - Flag informing whether the instruction's offset should be displayed. // insOffset - The instruction's offset. // id - The instrDesc of the code if needed. +// ig - The insGroup of the code if needed // // Note: // The length of the instruction's name include aligned space is 15. // -void emitter::emitDispInsName(code_t code, const BYTE* addr, bool doffs, unsigned insOffset, instrDesc* id) +void emitter::emitDispInsName( + code_t code, const BYTE* addr, bool doffs, unsigned insOffset, const instrDesc* id, const insGroup* ig) { const BYTE* insAdr = addr - writeableOffset; @@ -3249,47 +3389,16 @@ void emitter::emitDispInsName(code_t code, const BYTE* addr, bool doffs, unsigne unsigned int opcode2 = (code >> 12) & 0x7; const char* rs1 = RegNames[(code >> 15) & 0x1f]; const char* rs2 = RegNames[(code >> 20) & 0x1f]; - int offset = (((code >> 31) & 0x1) << 12) | (((code >> 7) & 0x1) << 11) | (((code >> 25) & 0x3f) << 5) | - (((code >> 8) & 0xf) << 1); - if (offset & 0x800) + // int offset = (((code >> 31) & 0x1) << 12) | (((code >> 7) & 0x1) << 11) | (((code >> 25) & 0x3f) << 5) | + // (((code >> 8) & 0xf) << 1); + // if (offset & 0x800) + // { + // offset |= 0xfffff000; + // } + if (!emitDispBranch(opcode2, rs1, rs2, id, ig)) { - offset |= 0xfffff000; + emitDispIllegalInstruction(code); } - switch (opcode2) - { - case 0: - printf("beq "); - break; - case 1: - printf("bne "); - break; - case 4: - printf("blt "); - break; - case 5: - printf("bge "); - break; - case 6: - printf("bltu"); - break; - case 7: - printf("bgeu"); - break; - default: - printf("RISCV64 illegal instruction: 0x%08X\n", code); - return; - } - static const int MAX_LEN = 32; - - int len = printf(" %s, %s, %d", rs1, rs2, offset); - if (len <= 0 || len > MAX_LEN) - { - printf("\n"); - return; - } - if (!emitComp->opts.disDiffable) - printf("%*s;; offset=0x%04X", MAX_LEN - len, "", emitCurCodeOffs(insAdr) + offset); - printf("\n"); return; } case 0x03: @@ -3931,7 +4040,7 @@ void emitter::emitDispIns( instrSize = sizeof(code_t); code_t instruction; memcpy(&instruction, instr, instrSize); - emitDispInsName(instruction, instr, doffs, offset, id); + emitDispInsName(instruction, instr, doffs, offset, id, ig); } } diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h index 986dbb9..d3b00b2 100644 --- a/src/coreclr/jit/emitriscv64.h +++ b/src/coreclr/jit/emitriscv64.h @@ -60,18 +60,33 @@ bool emitInsIsLoad(instruction ins); bool emitInsIsStore(instruction ins); bool emitInsIsLoadOrStore(instruction ins); -void emitDispInsName(code_t code, const BYTE* addr, bool doffs, unsigned insOffset, instrDesc* id); - +void emitDispInsName( + code_t code, const BYTE* addr, bool doffs, unsigned insOffset, const instrDesc* id, const insGroup* ig); void emitDispInsInstrNum(const instrDesc* id) const; +bool emitDispBranch(unsigned opcode2, + const char* register1Name, + const char* register2Name, + const instrDesc* id, + const insGroup* ig) const; +void emitDispBranchOffset(const instrDesc* id, const insGroup* ig) const; +void emitDispBranchLabel(const instrDesc* id) const; +bool emitDispBranchInstrType(unsigned opcode2) const; +void emitDispIllegalInstruction(code_t instructionCode); emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/); // Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + offset] void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir); -// Emit the 32-bit RISCV64 instruction 'code' into the 'dst' buffer +// Emit the 32-bit RISCV64 instruction 'code' into the 'dst' buffer unsigned emitOutput_Instr(BYTE* dst, code_t code); +ssize_t emitOutputInstrJumpDistance(const BYTE* dst, const BYTE* src, const insGroup* ig, instrDescJmp* jmp); +void emitOutputInstrJumpDistanceHelper(const insGroup* ig, + instrDescJmp* jmp, + UNATIVE_OFFSET& dstOffs, + const BYTE*& dstAddr) const; + // Method to do check if mov is redundant with respect to the last instruction. // If yes, the caller of this method can choose to omit current mov instruction. static bool IsMovInstruction(instruction ins); -- 2.7.4