Sync may31 release/8.0-tizen (#510)

author Timur Mustafin/Advanced System SW Lab /SRR/Staff Engineer/Samsung Electronics <t.mustafin@partner.samsung.com>

Thu, 6 Jun 2024 22:12:52 +0000 (01:12 +0300)

committer GitHub Enterprise <noreply-CODE@samsung.com>

Thu, 6 Jun 2024 22:12:52 +0000 (07:12 +0900)
author Timur Mustafin/Advanced System SW Lab /SRR/Staff Engineer/Samsung Electronics <t.mustafin@partner.samsung.com>
Thu, 6 Jun 2024 22:12:52 +0000 (01:12 +0300)
committer GitHub Enterprise <noreply-CODE@samsung.com>
Thu, 6 Jun 2024 22:12:52 +0000 (07:12 +0900)
diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake

index 832bff1..fa0f7bc 100644 (file)
--- a/src/coreclr/clrdefinitions.cmake
+++ b/src/coreclr/clrdefinitions.cmake
@@ -16,6 +16,7 @@ elseif (CLR_CMAKE_TARGET_ARCH_ARM)
    add_definitions(-DFEATURE_EMULATE_SINGLESTEP)
  elseif (CLR_CMAKE_TARGET_ARCH_RISCV64)
    add_definitions(-DFEATURE_EMULATE_SINGLESTEP)
+  add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_MULTIREG_RETURN>)
  endif (CLR_CMAKE_TARGET_ARCH_ARM64)
  
  if (CLR_CMAKE_TARGET_UNIX)
@@ -188,9 +189,9 @@ endif(FEATURE_ENABLE_NO_ADDRESS_SPACE_RANDOMIZATION)
  add_definitions(-DFEATURE_SVR_GC)
  add_definitions(-DFEATURE_SYMDIFF)
  add_compile_definitions(FEATURE_TIERED_COMPILATION)
-if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
     add_compile_definitions(FEATURE_ON_STACK_REPLACEMENT)
-endif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
+endif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
  add_compile_definitions(FEATURE_PGO)
  if (CLR_CMAKE_TARGET_WIN32)
      add_definitions(-DFEATURE_TYPEEQUIVALENCE)
diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp

index 439a2ce..935130b 100644 (file)
--- a/src/coreclr/debug/ee/controller.cpp
+++ b/src/coreclr/debug/ee/controller.cpp
@@ -6119,6 +6119,8 @@ bool DebuggerStepper::TrapStep(ControllerStackInfo *info, bool in)
          }
      }
      LOG((LF_CORDB,LL_INFO1000,"Ending TrapStep\n"));
+
+    return false;
  }
  
  bool DebuggerStepper::IsAddrWithinFrame(DebuggerJitInfo *dji,
diff --git a/src/coreclr/gcinfo/CMakeLists.txt b/src/coreclr/gcinfo/CMakeLists.txt

index c66a334..8398fa7 100644 (file)
--- a/src/coreclr/gcinfo/CMakeLists.txt
+++ b/src/coreclr/gcinfo/CMakeLists.txt
@@ -83,8 +83,10 @@ if (CLR_CMAKE_TARGET_ARCH_RISCV64)
    create_gcinfo_lib(TARGET gcinfo_unix_riscv64 OS unix ARCH riscv64)
  endif (CLR_CMAKE_TARGET_ARCH_RISCV64)
  
-create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm)
-create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86)
+if (NOT CLR_CMAKE_TARGET_ARCH_RISCV64)
+  create_gcinfo_lib(TARGET gcinfo_universal_arm OS universal ARCH arm)
+  create_gcinfo_lib(TARGET gcinfo_win_x86 OS win ARCH x86)
+endif (NOT CLR_CMAKE_TARGET_ARCH_RISCV64)
  
  if (CLR_CMAKE_TARGET_ARCH_I386 AND CLR_CMAKE_TARGET_UNIX)
    create_gcinfo_lib(TARGET gcinfo_unix_x86 OS unix ARCH x86)
diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h

index d897d24..80b1c4e 100644 (file)
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -303,12 +303,7 @@ CONFIG_DWORD_INFO(INTERNAL_JitDebuggable, W("JitDebuggable"), 0, "")
  #endif
  RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitEnableNoWayAssert, W("JitEnableNoWayAssert"), INTERNAL_JitEnableNoWayAssert_Default, "")
  
-#if defined(TARGET_RISCV64)
-// TODO-RISCV64-CQ: In RISCV64, currently jitc always generates JitFramed codes.
-RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_JitFramed, W("JitFramed"), 1, "Forces EBP frames")
-#else
  RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_JitFramed, W("JitFramed"), 0, "Forces EBP frames")
-#endif // TARGET_RISCV64
  CONFIG_DWORD_INFO(INTERNAL_JitThrowOnAssertionFailure, W("JitThrowOnAssertionFailure"), 0, "Throw managed exception on assertion failures during JIT instead of failfast")
  CONFIG_DWORD_INFO(INTERNAL_JitGCStress, W("JitGCStress"), 0, "GC stress mode for jit")
  CONFIG_DWORD_INFO(INTERNAL_JitHeartbeat, W("JitHeartbeat"), 0, "")
@@ -568,11 +563,11 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent,
  #endif // _DEBUG
  RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredCompilation, W("TieredCompilation"), 1, "Enables tiered compilation")
  RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_QuickJit, W("TC_QuickJit"), 1, "For methods that would be jitted, enable using quick JIT when appropriate.")
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 1, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.")
-#else // !(defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
+#else // !(defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) || defined(TARGET_RISCV64)
  RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 1, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.")
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_AggressiveTiering, W("TC_AggressiveTiering"), 0, "Transition through tiers aggressively.")
  RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_CallCountThreshold, W("TC_CallCountThreshold"), TC_CallCountThreshold, "Number of times a method must be called in tier 0 after which it is promoted to the next tier.")
  RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_CallCountingDelayMs, W("TC_CallCountingDelayMs"), TC_CallCountingDelayMs, "A perpetual delay in milliseconds that is applied to call counting in tier 0 and jitting at higher tiers, while there is startup-like activity.")
diff --git a/src/coreclr/inc/stdmacros.h b/src/coreclr/inc/stdmacros.h

index 7e4ae79..79f9225 100644 (file)
--- a/src/coreclr/inc/stdmacros.h
+++ b/src/coreclr/inc/stdmacros.h
@@ -159,9 +159,9 @@
      #define DBG_ADDR(ptr)      (DWORD)((UINT_PTR)(ptr))
  #endif // HOST_64BIT
  
-#ifdef TARGET_ARM
+#if defined(HOST_ARM) || defined(HOST_RISCV64)
      #define ALIGN_ACCESS        ((1<<LOG2_PTRSIZE)-1)
-#endif
+#endif // HOST_ARM || HOST_RISCV64
  
  
  #ifndef ALLOC_ALIGN_CONSTANT
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h

index 5d449cd..ab4b5a2 100644 (file)
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -257,7 +257,7 @@ protected:
      void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState);
  #endif
      void genEnregisterIncomingStackArgs();
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
      void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed);
  #else
      void genEnregisterOSRArgsAndLocals();
@@ -337,6 +337,10 @@ protected:
      void genOSRSaveRemainingCalleeSavedRegisters();
  #endif // TARGET_AMD64
  
+#if defined(TARGET_RISCV64)
+    void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize);
+#endif
+
      void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);
  
      void genPoisonFrame(regMaskTP bbRegLiveIn);
@@ -442,11 +446,11 @@ protected:
          regMaskTP fiSaveRegs;                // Set of callee-saved registers saved in the funclet prolog (includes RA)
          int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
                                               // (negative)
-        int fiSP_to_FPRA_save_delta;         // FP/RA register save offset from SP (positive)
+        int fiSP_to_CalleeSaved_delta;       // CalleeSaved register save offset from SP (positive)
+        int fiCalleeSavedPadding;            // CalleeSaved offset padding (positive)
          int fiSP_to_PSP_slot_delta;          // PSP slot offset from SP (positive)
          int fiCallerSP_to_PSP_slot_delta;    // PSP slot offset from Caller SP (negative)
-        int fiFrameType;                     // Funclet frame types are numbered. See genFuncletProlog() for details.
-        int fiSpDelta1;                      // Stack pointer delta 1 (negative)
+        int fiSpDelta;                       // Stack pointer delta (negative)
      };
  
      FuncletFrameInfoDsc genFuncletInfo;
diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp

index 586e651..a9340af 100644 (file)
--- a/src/coreclr/jit/codegencommon.cpp
+++ b/src/coreclr/jit/codegencommon.cpp
@@ -4670,7 +4670,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
  //    initReg -- scratch register to use if needed
  //    pInitRegZeroed -- [IN,OUT] if init reg is zero (on entry/exit)
  //
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed)
  #else
  void CodeGen::genEnregisterOSRArgsAndLocals()
@@ -4811,7 +4811,7 @@ void CodeGen::genEnregisterOSRArgsAndLocals()
  
          GetEmitter()->emitIns_R_AR(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset);
  
-#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  
          // Patchpoint offset is from top of Tier0 frame
          //
@@ -4843,7 +4843,7 @@ void CodeGen::genEnregisterOSRArgsAndLocals()
  
          genInstrWithConstant(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset, initReg);
          *pInitRegZeroed = false;
-#endif
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
      }
  }
  
@@ -5451,7 +5451,7 @@ void CodeGen::genFnProlog()
          psiBegProlog();
      }
  
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
      // For arm64 OSR, emit a "phantom prolog" to account for the actions taken
      // in the tier0 frame that impact FP and SP on entry to the OSR method.
      //
@@ -5466,7 +5466,7 @@ void CodeGen::genFnProlog()
          // SP is tier0 method's SP.
          compiler->unwindAllocStack(tier0FrameSize);
      }
-#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  
  #ifdef DEBUG
  
@@ -5789,13 +5789,25 @@ void CodeGen::genFnProlog()
      {
          initReg = REG_IP1;
      }
+#elif defined(TARGET_RISCV64)
+    // For RISC-V64 OSR root frames, we may need a scratch register for large
+    // offset addresses. Use a register that won't be allocated.
+    if (isRoot && compiler->opts.IsOSR())
+    {
+        initReg = REG_SCRATCH; // REG_T0
+    }
  #endif
  
-#ifndef TARGET_LOONGARCH64
+#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
      // For LoongArch64's OSR root frames, we may need a scratch register for large
      // offset addresses. But this does not conflict with the REG_PINVOKE_FRAME.
+    //
+    // RISC-V64's OSR root frames are similar to LoongArch64's. In this case
+    // REG_SCRATCH also shouldn't conflict with REG_PINVOKE_FRAME, even if
+    // technically they are the same register - REG_T0.
+    //
      noway_assert(!compiler->compMethodRequiresPInvokeFrame() || (initReg != REG_PINVOKE_FRAME));
-#endif
+#endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64
  
  #if defined(TARGET_AMD64)
      // If we are a varargs call, in order to set up the arguments correctly this
@@ -6106,7 +6118,7 @@ void CodeGen::genFnProlog()
          // Otherwise we'll do some of these fetches twice.
          //
          CLANG_FORMAT_COMMENT_ANCHOR;
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
          genEnregisterOSRArgsAndLocals(initReg, &initRegZeroed);
  #else
          genEnregisterOSRArgsAndLocals();
@@ -6158,7 +6170,7 @@ void CodeGen::genFnProlog()
          assignIncomingRegisterArgs(&floatRegState);
  #else
          assignIncomingRegisterArgs(&intRegState);
-#endif
+#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
  
  #endif // TARGET_LOONGARCH64 || TARGET_RISCV64
  
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp

index b0048c9..2c3014a 100644 (file)
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -1256,12 +1256,6 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
                  }
              }
  
-#if defined(TARGET_LOONGARCH64)
-            if (varTypeIsFloating(spillType) && emitter::isGeneralRegister(tree->GetRegNum()))
-            {
-                unspillType = unspillType == TYP_FLOAT ? TYP_INT : TYP_LONG;
-            }
-#endif
  #elif defined(TARGET_ARM)
  // No normalizing for ARM
  #else
diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp

index e613511..e4f7cc8 100644 (file)
--- a/src/coreclr/jit/codegenriscv64.cpp
+++ b/src/coreclr/jit/codegenriscv64.cpp
@@ -20,7 +20,34 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
  #include "lower.h"
  #include "gcinfo.h"
  #include "gcinfoencoder.h"
+#include "patchpointinfo.h"
  
+//------------------------------------------------------------------------
+// genInstrWithConstant:   we will typically generate one instruction
+//
+//    ins  reg1, reg2, imm
+//
+// However the imm might not fit as a directly encodable immediate,
+// when it doesn't fit we generate extra instruction(s) that sets up
+// the 'regTmp' with the proper immediate value.
+//
+//     mov  regTmp, imm
+//     ins  reg1, reg2, regTmp
+//
+// Arguments:
+//    ins                 - instruction
+//    attr                - operation size and GC attribute
+//    reg1, reg2          - first and second register operands
+//    imm                 - immediate value (third operand when it fits)
+//    tmpReg              - temp register to use when the 'imm' doesn't fit. Can be REG_NA
+//                          if caller knows for certain the constant will fit.
+//    inUnwindRegion      - true if we are in a prolog/epilog region with unwind codes.
+//                          Default: false.
+//
+// Return Value:
+//    returns true if the immediate was small enough to be encoded inside instruction. If not,
+//    returns false meaning the immediate was too large and tmpReg was used and modified.
+//
  bool CodeGen::genInstrWithConstant(instruction ins,
                                     emitAttr    attr,
                                     regNumber   reg1,
@@ -53,6 +80,9 @@ bool CodeGen::genInstrWithConstant(instruction ins,
          case INS_flw:
          case INS_ld:
          case INS_fld:
+        case INS_lbu:
+        case INS_lhu:
+        case INS_lwu:
              break;
  
          default:
@@ -99,10 +129,25 @@ bool CodeGen::genInstrWithConstant(instruction ins,
      return immFitsInIns;
  }
  
+//------------------------------------------------------------------------
+// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
+// or the epilog. The unwind codes for the generated instructions are produced. An available temporary
+// register is required to be specified, in case the constant is too large to encode in an "add"
+// instruction, such that we need to load the constant
+// into a register first, before using it.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP (can be negative)
+//    tmpReg                  - an available temporary register
+//    pTmpRegIsZero           - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                              Otherwise, we don't touch it.
+//    reportUnwindData        - If true, report the change in unwind data. Otherwise, do not report it.
+//
+// Return Value:
+//    None.
  void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool reportUnwindData)
  {
-    // Even though INS_addi is specified here, the encoder will choose either
-    // an INS_add_d or an INS_addi_d and encode the immediate as a positive value
+    // Even though INS_addi is specified here, the encoder will replace it with INS_add
      //
      bool wasTempRegisterUsedForImm =
          !genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true);
@@ -126,6 +171,26 @@ void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool*
      }
  }
  
+//------------------------------------------------------------------------
+// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
+// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
+// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+//    reg1                     - First register of pair to save.
+//    reg2                     - Second register of pair to save.
+//    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    useSaveNextPair          - True if the last prolog instruction was to save the previous register pair. This
+//                               allows us to emit the "save_next" unwind code.
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
  void CodeGen::genPrologSaveRegPair(regNumber reg1,
                                     regNumber reg2,
                                     int       spOffset,
@@ -148,7 +213,7 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1,
  
      if (spDelta != 0)
      {
-        // generate addi.d SP,SP,-imm
+        // generate addi SP,SP,-imm
          genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
  
          assert((spDelta + spOffset + 16) <= 0);
@@ -156,13 +221,36 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1,
          assert(spOffset <= 2031); // 2047-16
      }
  
-    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+    emitter* emit = GetEmitter();
+
+    // sd reg1, #spOffset(sp)
+    emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
      compiler->unwindSaveReg(reg1, spOffset);
  
-    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+    // sd reg2, #(spOffset + 8)(sp)
+    emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
      compiler->unwindSaveReg(reg2, spOffset + 8);
  }
  
+//------------------------------------------------------------------------
+// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
+// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
+// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
+// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
+// (and this function changes to support using pre-indexed SD addressing). The caller must ensure that we can use the
+// SD instruction, and that spOffset will be in the legal range for that instruction.
+//
+// Arguments:
+//    reg1                     - Register to save.
+//    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
  void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
  {
      assert(spOffset >= 0);
@@ -177,14 +265,37 @@ void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNum
  
      if (spDelta != 0)
      {
-        // generate daddiu SP,SP,-imm
+        // generate addi SP,SP,-imm
          genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
      }
  
-    GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+    emitter* emit = GetEmitter();
+
+    // sd reg1, #spOffset(sp)
+    emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
      compiler->unwindSaveReg(reg1, spOffset);
  }
  
+//------------------------------------------------------------------------
+// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
+// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
+// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+//    reg1                     - First register of pair to restore.
+//    reg2                     - Second register of pair to restore.
+//    spOffset                 - The offset from SP to load reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
+//    useSaveNextPair          - True if the last prolog instruction was to save the previous register pair. This
+//                               allows us to emit the "save_next" unwind code.
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
  void CodeGen::genEpilogRestoreRegPair(regNumber reg1,
                                        regNumber reg2,
                                        int       spOffset,
@@ -205,29 +316,49 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1,
          ins = INS_fld;
      }
  
+    emitter* emit = GetEmitter();
+
      if (spDelta != 0)
      {
          assert(!useSaveNextPair);
  
-        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+        // ld reg2, #(spOffset + 8)(SP)
+        emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
          compiler->unwindSaveReg(reg2, spOffset + 8);
  
-        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        // ld reg1, #spOffset(SP)
+        emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
          compiler->unwindSaveReg(reg1, spOffset);
  
-        // generate daddiu SP,SP,imm
+        // generate addi SP,SP,imm
          genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
      }
      else
      {
-        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
+        // ld reg2, #(spOffset + 8)(SP)
+        emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg2, REG_SPBASE, spOffset + 8);
          compiler->unwindSaveReg(reg2, spOffset + 8);
  
-        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        // ld reg1, #spOffset(SP)
+        emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
          compiler->unwindSaveReg(reg1, spOffset);
      }
  }
  
+//------------------------------------------------------------------------
+// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
+//
+// Arguments:
+//    reg1                     - Register to restore.
+//    spOffset                 - The offset from SP to restore reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
  void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
  {
      assert(spOffset >= 0);
@@ -240,22 +371,38 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg
          ins = INS_fld;
      }
  
+    emitter* emit = GetEmitter();
+
      if (spDelta != 0)
      {
-        // ld reg1, offset(SP)
-        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        // ld reg1, #spOffset(SP)
+        emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
          compiler->unwindSaveReg(reg1, spOffset);
  
-        // generate add SP,SP,imm
+        // generate addi SP,SP,imm
          genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero, /* reportUnwindData */ true);
      }
      else
      {
-        GetEmitter()->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+        // ld reg1 #spOffset(SP)
+        emit->emitIns_R_R_I(ins, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
          compiler->unwindSaveReg(reg1, spOffset);
      }
  }
  
+//------------------------------------------------------------------------
+// genBuildRegPairsStack: Build a stack of register pairs for prolog/epilog save/restore for the given mask.
+// The first register pair will contain the lowest register. Register pairs will combine neighbor
+// registers in pairs. If it can't be done (for example if we have a hole or this is the last reg in a mask with
+// odd number of regs) then the second element of that RegPair will be REG_NA.
+//
+// Arguments:
+//   regsMask - a mask of registers for prolog/epilog generation;
+//   regStack - a regStack instance to build the stack in, used to save temp copyings.
+//
+// Return value:
+//   no return value; the regStack argument is modified.
+//
  // static
  void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* regStack)
  {
@@ -310,6 +457,19 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* reg
      genSetUseSaveNextPairs(regStack);
  }
  
+//------------------------------------------------------------------------
+// genSetUseSaveNextPairs: Set useSaveNextPair for each RegPair on the stack which unwind info can be encoded as
+// save_next code.
+//
+// Arguments:
+//   regStack - a regStack instance to set useSaveNextPair.
+//
+// Notes:
+// We can use save_next for RegPair(N, N+1) only when we have sequence like (N-2, N-1), (N, N+1).
+// In this case in the prolog save_next for (N, N+1) refers to save_pair(N-2, N-1);
+// in the epilog the unwinder will search for the first save_pair (N-2, N-1)
+// and then go back to the first save_next (N, N+1) to restore it first.
+//
  // static
  void CodeGen::genSetUseSaveNextPairs(ArrayStack<RegPair>* regStack)
  {
@@ -338,6 +498,18 @@ void CodeGen::genSetUseSaveNextPairs(ArrayStack<RegPair>* regStack)
      }
  }
  
+//------------------------------------------------------------------------
+// genGetSlotSizeForRegsInMask: Get the stack slot size appropriate for the register type from the mask.
+//
+// Arguments:
+//   regsMask - a mask of registers for prolog/epilog generation.
+//
+// Return value:
+//   stack slot size in bytes.
+//
+// Note: Because int and float register type sizes match we can call this function with a mask that includes both.
+//
+// static
  int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
  {
      assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_RA)) == regsMask); // Do not expect anything else.
@@ -346,6 +518,14 @@ int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
      return REGSIZE_BYTES;
  }
  
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegisterGroup: Saves the group of registers described by the mask.
+//
+// Arguments:
+//   regsMask             - a mask of registers for prolog generation;
+//   spDelta              - if non-zero, the amount to add to SP before the first register save (or together with it);
+//   spOffset             - the offset from SP that is the beginning of the callee-saved register area;
+//
  void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
  {
      const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
@@ -353,21 +533,23 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i
      ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
      genBuildRegPairsStack(regsMask, &regStack);
  
+    regNumber tempReg = rsGetRsvdReg();
+
      for (int i = 0; i < regStack.Height(); ++i)
      {
          RegPair regPair = regStack.Bottom(i);
          if (regPair.reg2 != REG_NA)
          {
              // We can use two SD instructions.
-            genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, rsGetRsvdReg(),
+            genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, tempReg,
                                   nullptr);
  
-            spOffset += 2 * slotSize;
+            spOffset += slotSize << 1;
          }
          else
          {
              // No register pair; we use a SD instruction.
-            genPrologSaveReg(regPair.reg1, spOffset, spDelta, rsGetRsvdReg(), nullptr);
+            genPrologSaveReg(regPair.reg1, spOffset, spDelta, tempReg, nullptr);
              spOffset += slotSize;
          }
  
@@ -375,6 +557,37 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i
      }
  }
  
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
+// in the function or funclet prolog. Registers are saved in register number order from low addresses
+// to high addresses. This means that integer registers are saved at lower addresses than floatint-point/SIMD
+// registers.
+//
+// If establishing frame pointer chaining, it must be done after saving the callee-saved registers.
+//
+// We can only use the instructions that are allowed by the unwind codes. The caller ensures that
+// there is enough space on the frame to store these registers, and that the store instructions
+// we need to use (SD) are encodable with the stack-pointer immediate offsets we need to use.
+//
+// The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction.
+// Note that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
+// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
+// registers, though, we will have an empty alignment slot somewhere. It turns out we will put
+// it below (at a lower address) the callee-saved registers, as that is currently how we
+// do frame layout. This means that the first stack offset will be 8 and the stack pointer
+// adjustment must be done by an ADDI (or ADD), and not folded in to a pre-indexed store.
+//
+// Arguments:
+//    regsToSaveMask          - The mask of callee-saved registers to save. If empty, this function does nothing.
+//    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
+//                              if non-zero spDelta, then this is the offset of the first save *after* that
+//                              SP adjustment.
+//    spDelta                 - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                              zero).
+//
+// Notes:
+//    The save set can not contain FP/RA in which case FP/RA is saved along with the other callee-saved registers.
+//
  void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
  {
      assert(spDelta <= 0);
@@ -386,17 +599,17 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
          {
              // Currently this is the case for varargs only
              // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+            // addi sp, sp, #spDelta
              genStackPointerAdjustment(spDelta, rsGetRsvdReg(), nullptr, /* reportUnwindData */ true);
          }
          return;
      }
  
-    assert((spDelta % 16) == 0);
+    assert((spDelta % STACK_ALIGN) == 0);
  
      assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED));
  
      // Save integer registers at higher addresses than floating-point registers.
-
      regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
      regMaskTP maskSaveRegsInt   = regsToSaveMask & ~maskSaveRegsFloat;
  
@@ -414,6 +627,14 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
      }
  }
  
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegisterGroup: Restores the group of registers described by the mask.
+//
+// Arguments:
+//   regsMask             - a mask of registers for epilog generation;
+//   spDelta              - if non-zero, the amount to add to SP after the last register restore (or together with it);
+//   spOffset             - the offset from SP that is the beginning of the callee-saved register area;
+//
  void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
  {
      const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
@@ -421,6 +642,8 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta
      ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
      genBuildRegPairsStack(regsMask, &regStack);
  
+    regNumber tempReg = rsGetRsvdReg();
+
      int stackDelta = 0;
      for (int i = 0; i < regStack.Height(); ++i)
      {
@@ -436,19 +659,47 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta
          RegPair regPair = regStack.Top(i);
          if (regPair.reg2 != REG_NA)
          {
-            spOffset -= 2 * slotSize;
+            spOffset -= slotSize << 1;
  
-            genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair,
-                                    rsGetRsvdReg(), nullptr);
+            genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, tempReg,
+                                    nullptr);
          }
          else
          {
              spOffset -= slotSize;
-            genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, rsGetRsvdReg(), nullptr);
+            genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, tempReg, nullptr);
          }
      }
  }
  
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
+// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
+//
+// Arguments:
+//    regsToRestoreMask       - The mask of callee-saved registers to restore. If empty, this function does nothing.
+//    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
+//    spDelta                 - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                              zero).
+//
+// Here's an example restore sequence:
+//      ld    s11, #xxx(sp)
+//      ld    s10, #xxx(sp)
+//      ld    s9, #xxx(sp)
+//      ld    s8, #xxx(sp)
+//      ld    s7, #xxx(sp)
+//      ld    s6, #xxx(sp)
+//      ld    s5, #xxx(sp)
+//      ld    s4, #xxx(sp)
+//      ld    s3, #xxx(sp)
+//      ld    s2, #xxx(sp)
+//      ld    s1, #xxx(sp)
+//
+// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
+// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
+//
+// Return Value:
+//    None.
  void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
  {
      assert(spDelta >= 0);
@@ -464,7 +715,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
          return;
      }
  
-    assert((spDelta % 16) == 0);
+    assert((spDelta % STACK_ALIGN) == 0);
  
      // We also can restore FP and RA, even though they are not in RBM_CALLEE_SAVED.
      assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_RA));
@@ -495,14 +746,102 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
      }
  }
  
+// clang-format off
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet prolog.
+ *
+ *  Funclets have the following incoming arguments:
+ *
+ *      catch:          a0 = the exception object that was caught (see GT_CATCH_ARG)
+ *      filter:         a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function
+ *      finally/fault:  none
+ *
+ *  Funclets set the following registers on exit:
+ *
+ *     catch:          a0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ *     filter:         a0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ *     finally/fault:  none
+ *
+ *  The RISC-V64 funclet prolog is the following (Note: #framesz is total funclet frame size,
+ *  including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
+ *
+ *  Frame type liking:
+ *     addi sp, sp, -#framesz    ; establish the frame
+ *     sd s1, #outsz(sp)         ; save callee-saved registers, as necessary
+ *     sd s2, #(outsz+8)(sp)
+ *     sd ra, #(outsz+?)(sp)     ; save RA (8 bytes)
+ *     sd fp, #(outsz+?+8)(sp)   ; save FP (8 bytes)
+ *
+ *  The funclet frame layout:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |     Arguments  Or     | // if needed
+ *      |  Varargs regs space   | // Only for varargs functions; NYI on RV64
+ *      |-----------------------|
+ *      |    MonitorAcquired    | // 8 bytes; for synchronized methods
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in NativeAOT ABI)
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned
+ *      |-----------------------|
+ *      |      Saved FP         | // 8 bytes
+ *      |-----------------------|
+ *      |      Saved RA         | // 8 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // multiple of 8 bytes, not includting RA/FP
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ * Note, that SP only change once. That means, there will be a maximum of one alignment slot needed.
+ * Also remember, the stack oiubter needs to be 16 byte aligned at all times.
+ * The size of the PSP slot plus callee-saved registers space is a maximum of 280 bytes:
+ *
+ *     RA,FP registers
+ *     11 int callee-saved register s1-s11
+ *     12 float callee-saved registers f8-f9, f18-f27
+ *     8 saved integer argument registers a0-a7, if varargs function support.
+ *     1 PSP slot
+ *     1 alignment slot or monitor acquired slot
+ *     == 35 slots * 8 bytes = 280 bytes.
+ *
+ * The outgoing argument size, however, can be very large, if we call a function that takes a large number of
+ * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
+ * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of
+ * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before
+ * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset
+ * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument
+ * space. Both changes to SP might need to add alignment padding.
+ *
+ *  An example epilog sequence:
+ *     addi sp, sp, #outsz       ; if any outgoing argument space
+ *     ld s1, #(xxx-8)(sp)       ; restore callee-saved registers
+ *     ld s2, #xxx(sp)
+ *     ld ra, #(xxx+?-8)(sp)     ; restore RA
+ *     ld fp, #(xxx+?)(sp)       ; restore FP
+ *     addi sp, sp, #framesz
+ *     jarl zero, ra
+ */
  // clang-format on
  
  void CodeGen::genFuncletProlog(BasicBlock* block)
  {
  #ifdef DEBUG
      if (verbose)
+    {
          printf("*************** In genFuncletProlog()\n");
+    }
  #endif
+    // TODO-RISCV64: Implement varargs (NYI_RISCV64)
+    // TODO-RISCV64-CQ: We can use C extension for optimization
  
      assert(block != NULL);
      assert(block->bbFlags & BBF_FUNCLET_BEG);
@@ -513,15 +852,10 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
  
      compiler->unwindBegProlog();
  
-    regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
-    regMaskTP maskSaveRegsInt   = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
-
-    // Funclets must always save RA and FP, since when we have funclets we must have an FP frame.
-    assert((maskSaveRegsInt & RBM_RA) != 0);
-    assert((maskSaveRegsInt & RBM_FP) != 0);
+    const bool isFilter  = (block->bbCatchTyp == BBCT_FILTER);
+    const int  frameSize = genFuncletInfo.fiSpDelta;
  
-    bool isFilter  = (block->bbCatchTyp == BBCT_FILTER);
-    int  frameSize = genFuncletInfo.fiSpDelta1;
+    assert(frameSize < 0);
  
      regMaskTP maskArgRegsLiveIn;
      if (isFilter)
@@ -537,62 +871,53 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
          maskArgRegsLiveIn = RBM_A0;
      }
  
-#ifdef DEBUG
-    if (compiler->opts.disAsm)
-    {
-        printf("DEBUG: CodeGen::genFuncletProlog, frameType:%d\n\n", genFuncletInfo.fiFrameType);
-    }
-#endif
+    regMaskTP maskSaveRegs  = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED;
+    int       regsSavedSize = (compiler->compCalleeRegsPushed - 2) << 3;
  
-    int offset = 0;
-    if (genFuncletInfo.fiFrameType == 1)
-    {
-        // fiFrameType constraints:
-        assert(frameSize < 0);
-        assert(frameSize >= -2048);
+    int calleeSavedDelta = genFuncletInfo.fiSP_to_CalleeSaved_delta;
  
-        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
-        genStackPointerAdjustment(frameSize, rsGetRsvdReg(), nullptr, /* reportUnwindData */ true);
+    emitter* emit = GetEmitter();
+
+    if (calleeSavedDelta + regsSavedSize + genFuncletInfo.fiCalleeSavedPadding <= 2040)
+    {
+        calleeSavedDelta += genFuncletInfo.fiCalleeSavedPadding;
  
-        GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
-        compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
+        // addi sp, sp, #frameSize
+        genStackPointerAdjustment(frameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
  
-        GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
-        compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        genSaveCalleeSavedRegistersHelp(maskSaveRegs, calleeSavedDelta, 0);
+        calleeSavedDelta += regsSavedSize;
  
-        maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now
+        // sd ra, #calleeSavedDelta(sp)
+        emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSavedDelta);
+        compiler->unwindSaveReg(REG_RA, calleeSavedDelta);
  
-        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, genFuncletInfo.fiSP_to_PSP_slot_delta + 8,
-                                        0);
+        // sd fp, #(calleeSavedDelta+8)(sp)
+        emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSavedDelta + 8);
+        compiler->unwindSaveReg(REG_FP, calleeSavedDelta + 8);
      }
-    else if (genFuncletInfo.fiFrameType == 2)
+    else
      {
-        // fiFrameType constraints:
-        assert(frameSize < -2048);
-
-        offset      = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
-        int spDelta = roundUp((UINT)offset, STACK_ALIGN);
-        offset      = spDelta - offset;
+        assert(frameSize < -2040);
  
-        genStackPointerAdjustment(-spDelta, rsGetRsvdReg(), nullptr, /* reportUnwindData */ true);
+        int spDelta = frameSize + calleeSavedDelta;
  
-        GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
-        compiler->unwindSaveReg(REG_FP, offset);
+        // addi sp, sp, #spDelta
+        genStackPointerAdjustment(spDelta, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
  
-        GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
-        compiler->unwindSaveReg(REG_RA, offset + 8);
+        genSaveCalleeSavedRegistersHelp(maskSaveRegs, genFuncletInfo.fiCalleeSavedPadding, 0);
+        regsSavedSize += genFuncletInfo.fiCalleeSavedPadding;
  
-        maskSaveRegsInt &= ~(RBM_RA | RBM_FP); // We've saved these now
+        // sd ra, #regsSavedSize(sp)
+        emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, regsSavedSize);
+        compiler->unwindSaveReg(REG_RA, regsSavedSize);
  
-        offset = frameSize + spDelta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8;
-        genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, 0);
+        // sd fp, #(regsSavedSize+8)(sp)
+        emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, regsSavedSize + 8);
+        compiler->unwindSaveReg(REG_FP, regsSavedSize + 8);
  
-        genStackPointerAdjustment(frameSize + spDelta, rsGetRsvdReg(), nullptr,
-                                  /* reportUnwindData */ true);
-    }
-    else
-    {
-        unreached();
+        // addi sp, sp -#calleeSavedDelta
+        genStackPointerAdjustment(-calleeSavedDelta, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
      }
  
      // This is the end of the OS-reported prolog for purposes of unwinding
@@ -638,6 +963,12 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
      }
  }
  
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet epilog.
+ *
+ *  See the description of frame shapes at genFuncletProlog().
+ */
  void CodeGen::genFuncletEpilog()
  {
  #ifdef DEBUG
@@ -646,93 +977,80 @@ void CodeGen::genFuncletEpilog()
          printf("*************** In genFuncletEpilog()\n");
      }
  #endif
+    // TODO-RISCV64: Implement varargs (NYI_RISCV64)
+    // TODO-RISCV64-CQ: We can use C extension for optimization
  
      ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
  
-    bool unwindStarted = false;
-    int  frameSize     = genFuncletInfo.fiSpDelta1;
+    compiler->unwindBegEpilog();
  
-    if (!unwindStarted)
-    {
-        // We can delay this until we know we'll generate an unwindable instruction, if necessary.
-        compiler->unwindBegEpilog();
-        unwindStarted = true;
-    }
+    const int frameSize = genFuncletInfo.fiSpDelta;
  
-    regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
-    regMaskTP maskRestoreRegsInt   = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
+    assert(frameSize < 0);
  
-    // Funclets must always save RA and FP, since when we have funclets we must have an FP frame.
-    assert((maskRestoreRegsInt & RBM_RA) != 0);
-    assert((maskRestoreRegsInt & RBM_FP) != 0);
+    regMaskTP maskRestoreRegs = genFuncletInfo.fiSaveRegs & RBM_CALLEE_SAVED;
+    int       regsRestoreSize = (compiler->compCalleeRegsPushed - 2) << 3;
  
-#ifdef DEBUG
-    if (compiler->opts.disAsm)
-    {
-        printf("DEBUG: CodeGen::genFuncletEpilog, frameType:%d\n\n", genFuncletInfo.fiFrameType);
-    }
-#endif
+    int calleeSavedDelta = genFuncletInfo.fiSP_to_CalleeSaved_delta;
  
-    regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
+    emitter*  emit    = GetEmitter();
+    regNumber tempReg = rsGetRsvdReg();
  
-    assert(frameSize < 0);
-    if (genFuncletInfo.fiFrameType == 1)
+    if (calleeSavedDelta + regsRestoreSize + genFuncletInfo.fiCalleeSavedPadding <= 2040)
      {
-        // fiFrameType constraints:
-        assert(frameSize >= -2048);
-        assert(genFuncletInfo.fiSP_to_FPRA_save_delta < 2040);
-
-        regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end
+        calleeSavedDelta += genFuncletInfo.fiCalleeSavedPadding;
+        genRestoreCalleeSavedRegistersHelp(maskRestoreRegs, calleeSavedDelta, 0);
+        calleeSavedDelta += regsRestoreSize;
  
-        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, genFuncletInfo.fiSP_to_PSP_slot_delta + 8, 0);
+        // ld ra, #calleeSavedDelta(sp)
+        emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSavedDelta);
+        compiler->unwindSaveReg(REG_RA, calleeSavedDelta);
  
-        GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
-        compiler->unwindSaveReg(REG_RA, genFuncletInfo.fiSP_to_FPRA_save_delta + 8);
+        // ld fp, #(calleeSavedDelta+8)(sp)
+        emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSavedDelta + 8);
+        compiler->unwindSaveReg(REG_FP, calleeSavedDelta + 8);
  
-        GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, genFuncletInfo.fiSP_to_FPRA_save_delta);
-        compiler->unwindSaveReg(REG_FP, genFuncletInfo.fiSP_to_FPRA_save_delta);
-
-        // generate daddiu SP,SP,imm
-        genStackPointerAdjustment(-frameSize, rsGetRsvdReg(), nullptr, /* reportUnwindData */ true);
+        // addi sp, sp, -#frameSize
+        genStackPointerAdjustment(-frameSize, tempReg, nullptr, /* reportUnwindData */ true);
      }
-    else if (genFuncletInfo.fiFrameType == 2)
+    else
      {
-        // fiFrameType constraints:
-        assert(frameSize < -2048);
-
-        int offset  = -frameSize - genFuncletInfo.fiSP_to_FPRA_save_delta;
-        int spDelta = roundUp((UINT)offset, STACK_ALIGN);
-        offset      = spDelta - offset;
+        assert(frameSize < -2040);
  
-        // first, generate daddiu SP,SP,imm
-        genStackPointerAdjustment(-frameSize - spDelta, rsGetRsvdReg(), nullptr,
-                                  /* reportUnwindData */ true);
+        // addi sp, sp, #calleeSavedDelta
+        genStackPointerAdjustment(calleeSavedDelta, tempReg, nullptr, /* reportUnwindData */ true);
  
-        int offset2 = frameSize + spDelta + genFuncletInfo.fiSP_to_PSP_slot_delta + 8;
-        assert(offset2 < 2040); // can amend.
+        genRestoreCalleeSavedRegistersHelp(maskRestoreRegs, genFuncletInfo.fiCalleeSavedPadding, 0);
+        regsRestoreSize += genFuncletInfo.fiCalleeSavedPadding;
  
-        regsToRestoreMask &= ~(RBM_RA | RBM_FP); // We restore FP/RA at the end
-        genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, offset2, 0);
+        // ld ra, #regsRestoreSize(sp)
+        emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, regsRestoreSize);
+        compiler->unwindSaveReg(REG_RA, regsRestoreSize);
  
-        GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
-        compiler->unwindSaveReg(REG_RA, offset + 8);
+        // ld fp, #(regsRestoreSize+8)(sp)
+        emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, regsRestoreSize + 8);
+        compiler->unwindSaveReg(REG_FP, regsRestoreSize + 8);
  
-        GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
-        compiler->unwindSaveReg(REG_FP, offset);
-
-        // second, generate daddiu SP,SP,imm for remaine space.
-        genStackPointerAdjustment(spDelta, rsGetRsvdReg(), nullptr, /* reportUnwindData */ true);
-    }
-    else
-    {
-        unreached();
+        // addi sp, sp, -#(frameSize + calleeSavedDelta)
+        genStackPointerAdjustment(-(frameSize + calleeSavedDelta), tempReg, nullptr, /* reportUnwindData */ true);
      }
-    GetEmitter()->emitIns_R_R_I(INS_jalr, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0);
+
+    // jarl zero, ra
+    emit->emitIns_R_R_I(INS_jalr, emitActualTypeSize(TYP_I_IMPL), REG_R0, REG_RA, 0);
      compiler->unwindReturn(REG_RA);
  
      compiler->unwindEndEpilog();
  }
  
+/*****************************************************************************
+ *
+ *  Capture the information used to generate the funclet prologs and epilogs.
+ *  Note that all funclet prologs are identical, and all funclet epilogs are
+ *  identical (per type: filters are identical, and non-filters are identical).
+ *  Thus, we compute the data used for these just once.
+ *
+ *  See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
  void CodeGen::genCaptureFuncletPrologEpilogInfo()
  {
      if (!compiler->ehAnyFunclets())
@@ -745,87 +1063,87 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
      // The frame size and offsets must be finalized
      assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
  
-    genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
-
      regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
      assert((rsMaskSaveRegs & RBM_RA) != 0);
      assert((rsMaskSaveRegs & RBM_FP) != 0);
  
      unsigned pspSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? 8 : 0;
  
-    unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
-    assert((saveRegsCount == compiler->compCalleeRegsPushed) || (saveRegsCount == compiler->compCalleeRegsPushed - 1));
+    // If there is a PSP slot, we have to pad the funclet frame size for OSR.
+    // For more details see CodeGen::genFuncletProlog
+    //
+    unsigned osrPad = 0;
+    if (compiler->opts.IsOSR() && (pspSize != 0))
+    {
+        osrPad = compiler->info.compPatchpointInfo->TotalFrameSize();
  
-    unsigned saveRegsPlusPSPSize =
-        roundUp((UINT)genTotalFrameSize(), STACK_ALIGN) - compiler->compLclFrameSize + pspSize;
+        // osrPad must be aligned to stackSize
+        assert(osrPad % STACK_ALIGN == 0);
+    }
  
-    unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
+    genFuncletInfo.fiCalleeSavedPadding            = 0;
+    genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad;
  
-    assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
-    unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
+    unsigned savedRegsSize = genCountBits(rsMaskSaveRegs);
+    assert(savedRegsSize == compiler->compCalleeRegsPushed);
+    savedRegsSize <<= 3;
  
-    unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
-    assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
+    unsigned saveRegsPlusPSPSize = savedRegsSize + pspSize;
  
-    int spToFpraSaveDelta = compiler->lvaOutgoingArgSpaceSize;
+    assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+    unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
  
-    unsigned funcletFrameSize        = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
+    unsigned funcletFrameSize        = osrPad + saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
      unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
-    assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
-
-    unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
-    assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
  
-    if (maxFuncletFrameSizeAligned <= (2048 - 8))
+    int SP_to_CalleeSaved_delta = compiler->lvaOutgoingArgSpaceSize;
+    if ((SP_to_CalleeSaved_delta + savedRegsSize) >= 2040)
      {
-        genFuncletInfo.fiFrameType = 1;
-        saveRegsPlusPSPSize -= 2 * 8; // FP/RA
-    }
-    else
-    {
-        unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
-        assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
+        int offset              = funcletFrameSizeAligned - SP_to_CalleeSaved_delta;
+        SP_to_CalleeSaved_delta = AlignUp((UINT)offset, STACK_ALIGN);
  
-        genFuncletInfo.fiFrameType = 2;
-        saveRegsPlusPSPSize -= 2 * 8; // FP/RA
+        genFuncletInfo.fiCalleeSavedPadding = SP_to_CalleeSaved_delta - offset;
      }
  
-    int callerSpToPspSlotDelta = -(int)saveRegsPlusPSPSize;
-    genFuncletInfo.fiSpDelta1  = -(int)funcletFrameSizeAligned;
-    int spToPspSlotDelta       = funcletFrameSizeAligned - saveRegsPlusPSPSize;
+    if (compiler->lvaMonAcquired != BAD_VAR_NUM && !compiler->opts.IsOSR())
+    {
+        // We furthermore allocate the "monitor acquired" bool between PSP and
+        // the saved registers because this is part of the EnC header.
+        // Note that OSR methods reuse the monitor bool created by tier 0.
+        osrPad += compiler->lvaLclSize(compiler->lvaMonAcquired);
+    }
  
      /* Now save it for future use */
-    genFuncletInfo.fiSaveRegs              = rsMaskSaveRegs;
-    genFuncletInfo.fiSP_to_FPRA_save_delta = spToFpraSaveDelta;
-
-    genFuncletInfo.fiSP_to_PSP_slot_delta       = spToPspSlotDelta;
-    genFuncletInfo.fiCallerSP_to_PSP_slot_delta = callerSpToPspSlotDelta;
+    genFuncletInfo.fiSpDelta                    = -(int)funcletFrameSizeAligned;
+    genFuncletInfo.fiSaveRegs                   = rsMaskSaveRegs;
+    genFuncletInfo.fiSP_to_CalleeSaved_delta    = SP_to_CalleeSaved_delta;
+    genFuncletInfo.fiSP_to_PSP_slot_delta       = funcletFrameSizeAligned - osrPad - 8;
+    genFuncletInfo.fiCallerSP_to_PSP_slot_delta = -(int)osrPad - 8;
  
  #ifdef DEBUG
      if (verbose)
      {
          printf("\n");
          printf("Funclet prolog / epilog info\n");
-        printf("                        Save regs: ");
+        printf("                 Save regs: ");
          dspRegMask(genFuncletInfo.fiSaveRegs);
          printf("\n");
-        printf("    Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
-        printf("  SP to FP/RA save location delta: %d\n", genFuncletInfo.fiSP_to_FPRA_save_delta);
-        printf("                       Frame type: %d\n", genFuncletInfo.fiFrameType);
-        printf("                       SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
-
-        if (compiler->lvaPSPSym != BAD_VAR_NUM)
+        if (compiler->opts.IsOSR())
          {
-            if (callerSpToPspSlotDelta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for
-                                                                                                       // debugging
-            {
-                printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
-                       compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
-            }
+            printf("                           OSR Pad: %d\n", osrPad);
          }
+        printf("     Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
+        printf("  SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta);
+        printf("                          SP delta: %d\n", genFuncletInfo.fiSpDelta);
      }
+    assert(genFuncletInfo.fiSP_to_CalleeSaved_delta >= 0);
  
-    assert(genFuncletInfo.fiSP_to_FPRA_save_delta >= 0);
+    if (compiler->lvaPSPSym != BAD_VAR_NUM)
+    {
+        assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta ==
+               compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
+                                                                             // funclet!
+    }
  #endif // DEBUG
  }
  
@@ -1008,14 +1326,19 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
  
      noway_assert(isFramePointerUsed()); // We need an explicit frame pointer
  
-    int spToCallerSpDelta = -genCallerSPtoInitialSPdelta();
+    int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+    if (compiler->opts.IsOSR())
+    {
+        SPtoCallerSPdelta += compiler->info.compPatchpointInfo->TotalFrameSize();
+    }
  
      // We will just use the initReg since it is an available register
      // and we are probably done using it anyway...
      regNumber regTmp = initReg;
      *pInitRegZeroed  = false;
  
-    genInstrWithConstant(INS_addi, EA_PTRSIZE, regTmp, REG_SPBASE, spToCallerSpDelta, rsGetRsvdReg(), false);
+    genInstrWithConstant(INS_addi, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, regTmp, false);
      GetEmitter()->emitIns_S_R(INS_sd, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
  }
  
@@ -1273,7 +1596,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
              emitAttr size       = emitActualTypeSize(tree);
              double   constValue = tree->AsDblCon()->DconValue();
  
-            // Make sure we use "daddiu reg, zero, 0x00"  only for positive zero (0.0)
+            // Make sure we use "fmv.w.x reg, zero" only for positive zero (0.0)
              // and not for negative zero (-0.0)
              if (FloatingPointUtils::isPositiveZero(constValue))
              {
@@ -1291,12 +1614,10 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
                  CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(constValue, size);
  
                  // Load the FP constant.
-                assert(targetReg >= REG_F0);
-
-                instruction ins = size == EA_4BYTE ? INS_flw : INS_fld;
+                assert(emit->isFloatReg(targetReg));
  
                  // Compute the address of the FP constant and load the data.
-                emit->emitIns_R_C(ins, size, targetReg, REG_NA, hnd, 0);
+                emit->emitIns_R_C(size == EA_4BYTE ? INS_flw : INS_fld, size, targetReg, REG_NA, hnd, 0);
              }
          }
          break;
@@ -1686,12 +2007,18 @@ void CodeGen::genLclHeap(GenTree* tree)
      BasicBlock*          endLabel                 = nullptr; // can optimize for riscv64.
      unsigned             stackAdjustment          = 0;
      const target_ssize_t ILLEGAL_LAST_TOUCH_DELTA = (target_ssize_t)-1;
-    target_ssize_t       lastTouchDelta =
-        ILLEGAL_LAST_TOUCH_DELTA; // The number of bytes from SP to the last stack address probed.
+
+    // The number of bytes from SP to the last stack address probed.
+    target_ssize_t lastTouchDelta = ILLEGAL_LAST_TOUCH_DELTA;
  
      noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
      noway_assert(genStackLevel == 0);   // Can't have anything on the stack
  
+    const target_size_t pageSize = compiler->eeGetPageSize();
+
+    // According to RISC-V Privileged ISA page size is 4KiB
+    noway_assert(pageSize == 0x1000);
+
      // compute the amount of memory to allocate to properly STACK_ALIGN.
      size_t amount = 0;
      if (size->IsCnsIntOrI())
@@ -1793,7 +2120,7 @@ void CodeGen::genLclHeap(GenTree* tree)
                  goto ALLOC_DONE;
              }
          }
-        else if (amount < compiler->eeGetPageSize()) // must be < not <=
+        else if (amount < pageSize) // must be < not <=
          {
              // Since the size is less than a page, simply adjust the SP value.
              // The SP might already be in the guard page, so we must touch it BEFORE
@@ -1883,8 +2210,6 @@ void CodeGen::genLclHeap(GenTree* tree)
          //       addi     regCnt, REG_R0, 0
          //
          //  Skip:
-        //       sub      regCnt, SP, regCnt
-        //
          //       lui      regTmp, eeGetPageSize()>>12
          //  Loop:
          //       lw       r0, 0(SP)               // tickle the page - read from the page
@@ -1900,20 +2225,19 @@ void CodeGen::genLclHeap(GenTree* tree)
          if (tempReg == REG_NA)
              tempReg = tree->ExtractTempReg();
  
-        regNumber regTmp = tree->GetSingleTempReg();
+        regNumber rPageSize = tree->GetSingleTempReg();
  
          assert(regCnt != tempReg);
          emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt);
  
-        //// subu  regCnt, SP, regCnt      // regCnt now holds ultimate SP
+        // sub  regCnt, SP, regCnt      // regCnt now holds ultimate SP
          emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
  
          // Overflow, set regCnt to lowest possible value
          emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2);
          emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regCnt, REG_R0, 0);
  
-        assert(compiler->eeGetPageSize() == ((compiler->eeGetPageSize() >> 12) << 12));
-        emit->emitIns_R_I(INS_lui, EA_PTRSIZE, regTmp, compiler->eeGetPageSize() >> 12);
+        emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
  
          // genDefineTempLabel(loop);
  
@@ -1921,14 +2245,14 @@ void CodeGen::genLclHeap(GenTree* tree)
          emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SPBASE, 0);
  
          // decrement SP by eeGetPageSize()
-        emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, regTmp);
+        emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, rPageSize);
  
-        assert(regTmp != tempReg);
+        assert(rPageSize != tempReg);
  
          ssize_t imm = 3 << 2; // goto done.
          emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, tempReg, regCnt, imm);
  
-        emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, regTmp);
+        emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rPageSize);
  
          imm = -4 << 2;
          // Jump to loop and tickle new stack address
@@ -1952,8 +2276,7 @@ ALLOC_DONE:
          assert((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) || (lastTouchDelta >= 0));
  
          if ((lastTouchDelta == ILLEGAL_LAST_TOUCH_DELTA) ||
-            (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES >
-             compiler->eeGetPageSize()))
+            (stackAdjustment + (unsigned)lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize))
          {
              genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)stackAdjustment, tempReg);
          }
@@ -1970,7 +2293,7 @@ ALLOC_DONE:
      else // stackAdjustment == 0
      {
          // Move the final value of SP to targetReg
-        GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, targetReg, REG_SPBASE, 0);
+        emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, targetReg, REG_SPBASE, 0);
      }
  
  BAILOUT:
@@ -2163,18 +2486,6 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
          }
          else // if (tree->OperIs(GT_UDIV, GT_UMOD))
          {
-            // Only one possible exception
-            //     (AnyVal /  0) => DivideByZeroException
-            //
-            // Note that division by the constant 0 was already checked for above by the
-            // op2->IsIntegralConst(0) check
-
-            if (!divisorOp->IsCnsIntOrI())
-            {
-                // divisorOp is not a constant, so it could be zero
-                genJumpToThrowHlpBlk_la(SCK_DIV_BY_ZERO, INS_beq, divisorReg);
-            }
-
              if (tree->OperIs(GT_UDIV))
              {
                  ins = is4 ? INS_divuw : INS_divu;
@@ -2192,7 +2503,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* tree)
  // Generate code for InitBlk by performing a loop unroll
  // Preconditions:
  //   a) Both the size and fill byte value are integer constants.
-//   b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+//   b) The size of the struct to initialize is smaller than getUnrollThreshold() bytes.
  void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
  {
      assert(node->OperIs(GT_STORE_BLK));
@@ -2307,6 +2618,27 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
      }
  }
  
+// Generate code for CpObj nodes which copy structs that have interleaved
+// GC pointers.
+// For this case we'll generate a sequence of loads/stores in the case of struct
+// slots that don't contain GC pointers.  The generated code will look like:
+// ld tempReg, 8(a5)
+// sd tempReg, 8(a6)
+//
+// In the case of a GC-Pointer we'll call the ByRef write barrier helper
+// who happens to use the same registers as the previous call to maintain
+// the same register requirements and register killsets:
+// call CORINFO_HELP_ASSIGN_BYREF
+//
+// So finally an example would look like this:
+// ld tempReg, 8(a5)
+// sd tempReg 8(a6)
+// call CORINFO_HELP_ASSIGN_BYREF
+// ld tempReg, 8(a5)
+// sd tempReg, 8(a6)
+// call CORINFO_HELP_ASSIGN_BYREF
+// ld tempReg, 8(a5)
+// sd tempReg, 8(a6)
  void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
  {
      GenTree*  dstAddr       = cpObjNode->Addr();
@@ -3502,29 +3834,6 @@ void CodeGen::genCodeForCompare(GenTreeOp* tree)
      }
      else
      {
-        if (op1->isContainedIntOrIImmed())
-        {
-            op1 = tree->gtOp2;
-            op2 = tree->gtOp1;
-            switch (tree->OperGet())
-            {
-                case GT_LT:
-                    tree->SetOper(GT_GT);
-                    break;
-                case GT_LE:
-                    tree->SetOper(GT_GE);
-                    break;
-                case GT_GT:
-                    tree->SetOper(GT_LT);
-                    break;
-                case GT_GE:
-                    tree->SetOper(GT_LE);
-                    break;
-                default:
-                    break;
-            }
-        }
-
          bool      isUnsigned = (tree->gtFlags & GTF_UNSIGNED) != 0;
          regNumber regOp1     = op1->GetRegNum();
  
@@ -3789,7 +4098,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
                      else
                      {
                          imm = static_cast<int32_t>(imm);
-                        emit->emitIns_R_R_I(INS_addiw, EA_8BYTE, tmpRegOp1, regOp1, 0);
+                        emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
                      }
                      regOp1 = tmpRegOp1;
                      break;
@@ -3816,7 +4125,7 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
                  }
                  else
                  {
-                    emit->emitIns_R_R_I(INS_addiw, EA_8BYTE, tmpRegOp1, regOp1, 0);
+                    emit->emitIns_R_R(INS_sext_w, EA_8BYTE, tmpRegOp1, regOp1);
                  }
                  regOp1 = tmpRegOp1;
              }
@@ -3932,8 +4241,9 @@ void CodeGen::genCodeForJumpCompare(GenTreeOpCC* tree)
  int CodeGenInterface::genSPtoFPdelta() const
  {
      assert(isFramePointerUsed());
+    assert(compiler->compCalleeRegsPushed >= 2);
  
-    int delta = compiler->lvaOutgoingArgSpaceSize;
+    int delta = compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3) - 8;
  
      assert(delta >= 0);
      return delta;
@@ -3987,9 +4297,7 @@ int CodeGenInterface::genCallerSPtoFPdelta() const
  
  int CodeGenInterface::genCallerSPtoInitialSPdelta() const
  {
-    int callerSPtoSPdelta = 0;
-
-    callerSPtoSPdelta -= genTotalFrameSize();
+    int callerSPtoSPdelta = -genTotalFrameSize();
  
      assert(callerSPtoSPdelta <= 0);
      return callerSPtoSPdelta;
@@ -3999,8 +4307,8 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta() const
  // at the end
  static void emitLoadConstAtAddr(emitter* emit, regNumber dstRegister, ssize_t imm)
  {
-    ssize_t high = (imm >> 32) & 0xffffffff;
-    emit->emitIns_R_I(INS_lui, EA_PTRSIZE, dstRegister, (((high + 0x800) >> 12) & 0xfffff));
+    ssize_t high = imm >> 32;
+    emit->emitIns_R_I(INS_lui, EA_PTRSIZE, dstRegister, (high + 0x800) >> 12);
      emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, dstRegister, dstRegister, (high & 0xfff));
  
      ssize_t low = imm & 0xffffffff;
@@ -4827,27 +5135,34 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
          return;
      }
  
+    if (compiler->opts.IsOSR() && compiler->info.compPatchpointInfo->HasSecurityCookie())
+    {
+        // Security cookie is on original frame and was initialized there.
+        return;
+    }
+
+    emitter* emit = GetEmitter();
+
      if (compiler->gsGlobalSecurityCookieAddr == nullptr)
      {
          noway_assert(compiler->gsGlobalSecurityCookieVal != 0);
          instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, compiler->gsGlobalSecurityCookieVal);
  
-        GetEmitter()->emitIns_S_R(INS_sd, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
+        emit->emitIns_S_R(INS_sd, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
      }
      else
      {
          if (compiler->opts.compReloc)
          {
-            GetEmitter()->emitIns_R_AI(INS_jalr, EA_PTR_DSP_RELOC, initReg,
-                                       (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+            emit->emitIns_R_AI(INS_jal, EA_PTR_DSP_RELOC, initReg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
          }
          else
          {
-            GetEmitter()->emitLoadImmediate(EA_PTRSIZE, initReg, ((size_t)compiler->gsGlobalSecurityCookieAddr));
-            GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, initReg, initReg, 0);
+            emit->emitLoadImmediate(EA_PTRSIZE, initReg, ((size_t)compiler->gsGlobalSecurityCookieAddr));
+            emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, initReg, initReg, 0);
          }
          regSet.verifyRegUsed(initReg);
-        GetEmitter()->emitIns_S_R(INS_sd, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
+        emit->emitIns_S_R(INS_sd, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0);
      }
  
      *pInitRegZeroed = false;
@@ -4899,7 +5214,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
              UINT32 high = ((ssize_t)compiler->gsGlobalSecurityCookieAddr) >> 32;
              if (((high + 0x800) >> 12) != 0)
              {
-                GetEmitter()->emitIns_R_I(INS_lui, EA_PTRSIZE, regGSConst, (((high + 0x800) >> 12) & 0xfffff));
+                GetEmitter()->emitIns_R_I(INS_lui, EA_PTRSIZE, regGSConst, ((int32_t)(high + 0x800)) >> 12);
              }
              if ((high & 0xFFF) != 0)
              {
@@ -5434,13 +5749,13 @@ void CodeGen::genRangeCheck(GenTree* oper)
      if (genActualType(length) == TYP_INT)
      {
          regNumber tempReg = oper->ExtractTempReg();
-        GetEmitter()->emitIns_R_R_I(INS_addiw, EA_4BYTE, tempReg, lengthReg, 0); // sign-extend
+        GetEmitter()->emitIns_R_R(INS_sext_w, EA_4BYTE, tempReg, lengthReg);
          lengthReg = tempReg;
      }
      if (genActualType(index) == TYP_INT)
      {
          regNumber tempReg = oper->GetSingleTempReg();
-        GetEmitter()->emitIns_R_R_I(INS_addiw, EA_4BYTE, tempReg, indexReg, 0); // sign-extend
+        GetEmitter()->emitIns_R_R(INS_sext_w, EA_4BYTE, tempReg, indexReg);
          indexReg = tempReg;
      }
  
@@ -5874,7 +6189,7 @@ void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)
  //    None
  //
  // Assumption:
-//  The size argument of the CpBlk node is a constant and <= CPBLK_UNROLL_LIMIT bytes.
+//  The size argument of the CpBlk node is a constant and <= getUnrollThreshold() bytes.
  //
  void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
  {
@@ -7100,11 +7415,172 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
  }
  
  //------------------------------------------------------------------------
-// genAllocLclFrame: Probe the stack and allocate the local stack frame: subtract from SP.
+// genStackProbe: Probe the stack without changing it
+//
+// Notes:
+//      This function is using loop to probe each memory page.
+//
+// Arguments:
+//    frameSize - total frame size
+//    rOffset - usually initial register number
+//    rLimit - an extra register for comparison
+//    rPageSize - register for storing page size
+//
+void CodeGen::genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize)
+{
+    // make sure frameSize safely fits within 4 bytes
+    noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize);
+
+    const target_size_t pageSize = compiler->eeGetPageSize();
+
+    // According to RISC-V Privileged ISA page size should be equal 4KiB
+    noway_assert(pageSize == 0x1000);
+
+    emitter* emit = GetEmitter();
+
+    emit->emitLoadImmediate(EA_PTRSIZE, rLimit, -frameSize);
+    regSet.verifyRegUsed(rLimit);
  
+    emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, rLimit, rLimit, REG_SPBASE);
+
+    emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
+    regSet.verifyRegUsed(rPageSize);
+
+    emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, REG_SPBASE, rPageSize);
+
+    // Loop:
+    // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
+    emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, rOffset, 0);
+    emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, rOffset, rPageSize);
+
+    // each instr is 4 bytes
+    // if (rOffset >= rLimit) goto Loop;
+    emit->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, -2 << 2);
+}
+
+//------------------------------------------------------------------------
+// genAllocLclFrame: Probe the stack.
+//
+// Notes:
+//      This only does the probing; allocating the frame is done when callee-saved registers are saved.
+//      This is done before anything has been pushed. The previous frame might have a large outgoing argument
+//      space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
+//      not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
+//      there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
+//      page by default, so we need to be more careful. We do an extra probe if we might not have probed
+//      recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
+//      on Windows as well just to be consistent, even though it should not be necessary.
+//
+// Arguments:
+//      frameSize         - the size of the stack frame being allocated.
+//      initReg           - register to use as a scratch register.
+//      pInitRegZeroed    - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
+//                          this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged.
+//      maskArgRegsLiveIn - incoming argument registers that are currently live.
+//
+// Return value:
+//      None
+//
  void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
  {
-    NYI_RISCV64("genAllocLclFrame-----unimplemented/unused on RISCV64 yet----");
+    assert(compiler->compGeneratingProlog);
+
+    if (frameSize == 0)
+    {
+        return;
+    }
+
+    // According to RISC-V Privileged ISA page size should be equal 4KiB
+    const target_size_t pageSize = compiler->eeGetPageSize();
+
+    assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
+
+    target_size_t lastTouchDelta = 0;
+
+    emitter* emit = GetEmitter();
+
+    // Emit the following sequence to 'tickle' the pages.
+    // Note it is important that stack pointer not change until this is complete since the tickles
+    // could cause a stack overflow, and we need to be able to crawl the stack afterward
+    // (which means the stack pointer needs to be known).
+
+    if (frameSize < pageSize)
+    {
+        // no probe needed
+        lastTouchDelta = frameSize;
+    }
+    else if (frameSize < 3 * pageSize)
+    {
+        // between 1 and 3 pages we will probe each page without a loop,
+        // because it is faster that way and doesn't cost us much
+        lastTouchDelta = frameSize;
+
+        for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
+        {
+            emit->emitIns_R_I(INS_lui, EA_PTRSIZE, initReg, probeOffset >> 12);
+            regSet.verifyRegUsed(initReg);
+
+            emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
+            emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0);
+
+            lastTouchDelta -= pageSize;
+        }
+
+        assert(pInitRegZeroed != nullptr);
+        *pInitRegZeroed = false; // The initReg does not contain zero
+
+        assert(lastTouchDelta == frameSize % pageSize);
+        compiler->unwindPadding();
+    }
+    else
+    {
+        // probe each page, that we need to allocate large stack frame
+        assert(frameSize >= 3 * pageSize);
+
+        regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
+        availMask &= ~maskArgRegsLiveIn;   // Remove all of the incoming argument registers
+                                           // as they are currently live
+        availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
+
+        noway_assert(availMask != RBM_NONE);
+
+        regMaskTP regMask = genFindLowestBit(availMask);
+        regNumber rLimit  = genRegNumFromMask(regMask);
+
+        availMask &= ~regMask; // Remove rLimit register
+
+        noway_assert(availMask != RBM_NONE);
+
+        regMask             = genFindLowestBit(availMask);
+        regNumber rPageSize = genRegNumFromMask(regMask);
+
+        genStackProbe((ssize_t)frameSize, initReg, rLimit, rPageSize);
+
+        assert(pInitRegZeroed != nullptr);
+        *pInitRegZeroed = false; // The initReg does not contain zero
+
+        lastTouchDelta = frameSize % pageSize;
+        compiler->unwindPadding();
+    }
+
+#if STACK_PROBE_BOUNDARY_THRESHOLD_BYTES != 0
+    // if the last page was too far, we will make an extra probe at the bottom
+    if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
+    {
+        assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < pageSize << 1);
+
+        emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, initReg, REG_R0, frameSize);
+        regSet.verifyRegUsed(initReg);
+
+        emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
+        emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0);
+
+        assert(pInitRegZeroed != nullptr);
+        *pInitRegZeroed = false; // The initReg does not contain zero
+
+        compiler->unwindPadding();
+    }
+#endif
  }
  
  void CodeGen::genJumpToThrowHlpBlk_la(
@@ -7261,12 +7737,82 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
  
  /*-----------------------------------------------------------------------------
   *
- *  Push/Pop any callee-saved registers we have used
+ * Push/Pop any callee-saved registers we have used,
+ * For most frames, generatint liking:
+ *      addi sp, sp, -#framesz      ; establish the frame
+ *
+ *      ; save float regs
+ *      fsd f8, #offset(sp)
+ *      fsd f9, #(offset+8)(sp)
+ *      fsd f18, #(offset+16)(sp)
+ *      ; ...
+ *      fsd f27, #(offset+8*11)(sp)
+ *
+ *      ; save int regs
+ *      sd s1, #offset2(sp)
+ *      sd s2, #(offset2+8)(sp)
+ *      ; ...
+ *      sd s11, #(offset+8*10)(sp)
+ *
+ *      ; save ra, fp
+ *      sd ra, #offset3(sp)         ; save RA (8 bytes)
+ *      sd fp, #(offset3+8)(sp)     ; save FP (8 bytes)
+ *
+ * Notes:
+ * 1. FP is always saved, and the first store is FP, RA.
+ * 2. General-purpose registers are 8 bytes, floating-point registers are 8 bytes.
+ * 3. For frames with varargs, not implemented completely and not tested !
+ * 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
+ *
+ * For functions with GS and localloc, we change the frame so the frame pointer and RA are saved at the top
+ * of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same
+ * rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP.
+ * Since this frame type is relatively rare, we force using it via stress modes, for additional coverage.
+ *
+ * The frames look like the following (simplified to only include components that matter for establishing the
+ * frames). See also Compiler::lvaAssignFrameOffsets().
+ *
+ * The RISC-V's frame layout is liking:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |  incoming arguments   |
+ *      +=======================+ <---- Caller's SP
+ *      |     Arguments  Or     | // if needed
+ *      |  Varargs regs space   | // Only for varargs functions; NYI on RV64
+ *      |-----------------------|
+ *      |    MonitorAcquired    | // 8 bytes; for synchronized methods
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes (omitted in NativeAOT ABI)
+ *      |-----------------------|
+ *      | locals, temps, etc.   |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |      Saved FP         | // 8 bytes
+ *      |-----------------------|
+ *      |      Saved RA         | // 8 bytes
+ *      |-----------------------|
+ *      |Callee saved registers | // not including FP/RA; multiple of 8 bytes
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
   */
  void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
  {
      assert(compiler->compGeneratingProlog);
  
+    // The 'initReg' could have been calculated as one of the callee-saved registers (let's say T0, T1 and T2 are in
+    // use, so the next possible register is S1, which should be callee-save register). This is fine, as long as we
+    // save callee-saved registers before using 'initReg' for the first time. Instead, we can use REG_SCRATCH
+    // beforehand. We don't care if REG_SCRATCH will be overwritten, so we'll skip 'RegZeroed check'.
+    //
+    // Unlike on x86/x64, we can also push float registers to stack
      regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
  
  #if ETW_EBP_FRAMED
@@ -7276,11 +7822,8 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
      }
  #endif
  
-    // On RISCV64 we push the FP (frame-pointer) here along with all other callee saved registers
-    if (isFramePointerUsed())
-    {
-        rsPushRegs |= RBM_FPBASE;
-    }
+    // On RV64 we always use the FP (frame-pointer)
+    assert(isFramePointerUsed());
  
      //
      // It may be possible to skip pushing/popping ra for leaf methods. However, such optimization would require
@@ -7302,29 +7845,25 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
      // is not worth it.
      //
  
-    rsPushRegs |= RBM_RA; // We must save the return address (in the RA register).
-    regSet.rsMaskCalleeSaved    = rsPushRegs;
-    regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
-    regMaskTP maskSaveRegsInt   = rsPushRegs & ~maskSaveRegsFloat;
+    // we will push callee-saved registers along with fp and ra registers to stack
+    regMaskTP rsPushRegsMask = rsPushRegs | RBM_FP | RBM_RA;
+    regSet.rsMaskCalleeSaved = rsPushRegsMask;
  
  #ifdef DEBUG
-    if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
+    if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegsMask))
      {
          printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
-               compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
-        dspRegMask(rsPushRegs);
+               compiler->compCalleeRegsPushed, genCountBits(rsPushRegsMask));
+        dspRegMask(rsPushRegsMask);
          printf("\n");
-        assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
+        assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegsMask));
      }
-#endif // DEBUG
-
-    int totalFrameSize = genTotalFrameSize();
  
-    int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
-
-#ifdef DEBUG
      if (verbose)
      {
+        regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_FLT_CALLEE_SAVED;
+        regMaskTP maskSaveRegsInt   = rsPushRegs & RBM_INT_CALLEE_SAVED;
+
          printf("Save float regs: ");
          dspRegMask(maskSaveRegsFloat);
          printf("\n");
@@ -7342,96 +7881,72 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
      // first save instruction as a "predecrement" amount, if possible.
      int calleeSaveSPDelta = 0;
  
-    // By default, we'll establish the frame pointer chain. (Note that currently frames without FP are NYI.)
-    bool establishFramePointer = true;
-
-    // If we do establish the frame pointer, what is the amount we add to SP to do so?
-    unsigned offsetSpToSavedFp = 0;
-
-    if (isFramePointerUsed())
-    {
-        // We need to save both FP and RA.
-
-        assert((maskSaveRegsInt & RBM_FP) != 0);
-        assert((maskSaveRegsInt & RBM_RA) != 0);
-
-        // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address
-        // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses,
-        // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will
-        // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our
-        // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above
-        // the GS cookie.
-        //
-        // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to
-        // create a frame pointer chain.
-        //
+    // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address
+    // (FP and RA) are protected from buffer overrun by the GS cookie. If FP/RA are at the lowest addresses,
+    // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will
+    // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our
+    // saved FP/RA. In that case, we save FP/RA along with the rest of the callee-saved registers, above
+    // the GS cookie.
+    //
+    // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to
+    // create a frame pointer chain.
+    //
  
-        if (totalFrameSize < 2048)
-        {
-            GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize);
-            compiler->unwindAllocStack(totalFrameSize);
+    // This will be the starting place for saving the callee-saved registers, in increasing order.
+    int offset = compiler->lvaOutgoingArgSpaceSize;
  
-            // Case #1.
-            //
-            // Generate:
-            //      addi sp, sp, -framesz
-            //      sd fp, outsz(sp)
-            //      sd ra, outsz+8(sp)
-            //
-            // The (totalFrameSize <= 2047) condition ensures the offsets of sd/ld.
-            //
-            // After saving callee-saved registers, we establish the frame pointer with:
-            //      daddiu fp, sp, offset-fp
-            // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+    int totalFrameSize = genTotalFrameSize();
  
-            JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
-                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
+    emitter* emit = GetEmitter();
  
-            frameType = 1;
+    // ensure offset of sd/ld
+    if (totalFrameSize <= 2040)
+    {
+        frameType = 1;
  
-            offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
+        emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -totalFrameSize);
+        compiler->unwindAllocStack(totalFrameSize);
  
-            GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offsetSpToSavedFp);
-            compiler->unwindSaveReg(REG_FP, offsetSpToSavedFp);
+        JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                totalFrameSize, compiler->compLclFrameSize);
+    }
+    else
+    {
+        frameType = 2;
+        // we have to adjust stack pointer; probably using add instead of addi
  
-            GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offsetSpToSavedFp + 8);
-            compiler->unwindSaveReg(REG_RA, offsetSpToSavedFp + 8);
+        JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                totalFrameSize, compiler->compLclFrameSize);
  
-            maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
+        if ((offset + (compiler->compCalleeRegsPushed << 3)) >= 2040)
+        {
+            offset            = totalFrameSize - compiler->lvaOutgoingArgSpaceSize;
+            calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
+            offset            = calleeSaveSPDelta - offset;
  
-            offset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // FP/RA
+            genStackPointerAdjustment(-calleeSaveSPDelta, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
          }
          else
          {
-            JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
-                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
-
-            frameType = 2;
-
-            maskSaveRegsInt &= ~(RBM_FP | RBM_RA); // We've already saved FP/RA
-
-            offset            = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-            calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
-            offset            = calleeSaveSPDelta - offset;
+            genStackPointerAdjustment(-totalFrameSize, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
          }
      }
-    else
-    {
-        // No frame pointer (no chaining).
-        assert((maskSaveRegsInt & RBM_FP) == 0);
-        assert((maskSaveRegsInt & RBM_RA) != 0);
  
-        // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using
-        // 'sd' if we only have one callee-saved register plus RA to save.
+    JITDUMP("    offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta);
  
-        NYI_RISCV64("Frame without frame pointer");
-        offset = 0;
-    }
+    genSaveCalleeSavedRegistersHelp(rsPushRegs, offset, 0);
+    offset += (int)(genCountBits(rsPushRegs) << 3); // each reg has 8 bytes
  
-    assert(frameType != 0);
+    // From now on, we can safely use initReg.
  
-    JITDUMP("    offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta);
-    genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
+    emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset);
+    compiler->unwindSaveReg(REG_RA, offset);
+
+    emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offset + 8);
+    compiler->unwindSaveReg(REG_FP, offset + 8);
+
+    JITDUMP("    offsetSpToSavedFp=%d\n", offset + 8);
+    genEstablishFramePointer(offset + 8, /* reportUnwindData */ true);
  
      // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
      // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
@@ -7448,60 +7963,12 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
          printf("DEBUG: RISCV64, frameType:%d\n\n", frameType);
      }
  #endif
-    if (frameType == 1)
-    {
-        // offsetSpToSavedFp = genSPtoFPdelta();
-    }
-    else if (frameType == 2)
-    {
-        if (compiler->lvaOutgoingArgSpaceSize >= 2040)
-        {
-            offset            = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
-            calleeSaveSPDelta = AlignUp((UINT)offset, STACK_ALIGN);
-            offset            = calleeSaveSPDelta - offset;
-
-            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
-
-            offsetSpToSavedFp = offset;
-
-            GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
-            compiler->unwindSaveReg(REG_FP, offset);
-
-            GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
-            compiler->unwindSaveReg(REG_RA, offset + 8);
-
-            genEstablishFramePointer(offset, /* reportUnwindData */ true);
  
-            calleeSaveSPDelta = compiler->lvaOutgoingArgSpaceSize & ~0xf;
-            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
-        }
-        else
-        {
-            calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta;
-            genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
-
-            offset = compiler->lvaOutgoingArgSpaceSize;
-
-            GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_FP, REG_SPBASE, offset);
-            compiler->unwindSaveReg(REG_FP, offset);
-
-            GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_RA, REG_SPBASE, offset + 8);
-            compiler->unwindSaveReg(REG_RA, offset + 8);
-
-            genEstablishFramePointer(offset, /* reportUnwindData */ true);
-        }
-
-        establishFramePointer = false;
-    }
-    else
-    {
-        unreached();
-    }
-
-    if (establishFramePointer)
+    if (calleeSaveSPDelta != 0)
      {
-        JITDUMP("    offsetSpToSavedFp=%d\n", offsetSpToSavedFp);
-        genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
+        assert(frameType == 2);
+        calleeSaveSPDelta = totalFrameSize - calleeSaveSPDelta;
+        genStackPointerAdjustment(-calleeSaveSPDelta, initReg, pInitRegZeroed, /* reportUnwindData */ true);
      }
  }
  
@@ -7509,149 +7976,112 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
  {
      assert(compiler->compGeneratingEpilog);
  
-    regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
-
-    if (isFramePointerUsed())
-    {
-        rsRestoreRegs |= RBM_FPBASE;
-    }
+    regMaskTP regsToRestoreMask = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
  
-    rsRestoreRegs |= RBM_RA; // We must save/restore the return address.
-
-    regMaskTP regsToRestoreMask = rsRestoreRegs;
-
-    int totalFrameSize = genTotalFrameSize();
+    // On RV64 we always use the FP (frame-pointer)
+    assert(isFramePointerUsed());
  
+    int totalFrameSize     = genTotalFrameSize();
+    int remainingSPSize    = totalFrameSize;
+    int callerSPtoFPdelta  = 0;
      int calleeSaveSPOffset = 0; // This will be the starting place for restoring
                                  // the callee-saved registers, in decreasing order.
-    int frameType         = 0;  // An indicator of what type of frame we are popping.
-    int calleeSaveSPDelta = 0;  // Amount to add to SP after callee-saved registers have been restored.
-
-    if (isFramePointerUsed())
-    {
-        if (totalFrameSize <= 2047)
-        {
-            if (compiler->compLocallocUsed)
-            {
-                int spToFpDelta = genSPtoFPdelta();
-                // Restore sp from fp
-                GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -spToFpDelta);
-                compiler->unwindSetFrameReg(REG_FPBASE, spToFpDelta);
-            }
-
-            JITDUMP("Frame type 1(save FP/RA at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
-                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed));
  
-            frameType = 1;
+    emitter* emit = GetEmitter();
  
-            regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
+    // ensure offset of sd/ld
+    if (totalFrameSize <= 2040)
+    {
+        JITDUMP("Frame type 1. #outsz=%d; #framesz=%d; localloc? %s\n", unsigned(compiler->lvaOutgoingArgSpaceSize),
+                totalFrameSize, dspBool(compiler->compLocallocUsed));
  
-            calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
-        }
-        else
+        if (compiler->compLocallocUsed)
          {
-            JITDUMP("Frame type 2(save FP/RA at bottom). #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
-                    "localloc? %s\n",
-                    unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
-                    dspBool(compiler->compLocallocUsed));
+            callerSPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8 + compiler->lvaOutgoingArgSpaceSize;
+        }
+        calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize;
+        // remainingSPSize = totalFrameSize;
+    }
+    else
+    {
+        JITDUMP("Frame type 2. #outsz=%d; #framesz=%d; calleeSaveRegsPushed: %d; localloc? %s\n",
+                unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compCalleeRegsPushed,
+                dspBool(compiler->compLocallocUsed));
  
-            frameType = 2;
+        if ((compiler->lvaOutgoingArgSpaceSize + (compiler->compCalleeRegsPushed << 3)) > 2047)
+        {
+            calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize & 0xfffffff0;
  
-            int outSzAligned;
-            if (compiler->lvaOutgoingArgSpaceSize >= 2040)
+            if (compiler->compLocallocUsed)
              {
-                int offset         = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                calleeSaveSPDelta  = AlignUp((UINT)offset, STACK_ALIGN);
-                calleeSaveSPOffset = calleeSaveSPDelta - offset;
-
-                int offset2       = totalFrameSize - calleeSaveSPDelta - compiler->lvaOutgoingArgSpaceSize;
-                calleeSaveSPDelta = AlignUp((UINT)offset2, STACK_ALIGN);
-                offset2           = calleeSaveSPDelta - offset2;
-
-                if (compiler->compLocallocUsed)
-                {
-                    // Restore sp from fp
-                    GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
-                    compiler->unwindSetFrameReg(REG_FPBASE, offset2);
-                }
-                else
-                {
-                    outSzAligned = compiler->lvaOutgoingArgSpaceSize & ~0xf;
-                    genStackPointerAdjustment(outSzAligned, rsGetRsvdReg(), nullptr,
-                                              /* reportUnwindData */ true);
-                }
-
-                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
-
-                GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
-                compiler->unwindSaveReg(REG_RA, offset2 + 8);
-
-                GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
-                compiler->unwindSaveReg(REG_FP, offset2);
-
-                genStackPointerAdjustment(calleeSaveSPDelta, rsGetRsvdReg(), nullptr,
-                                          /* reportUnwindData */ true);
-
-                calleeSaveSPDelta = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDelta, STACK_ALIGN);
+                callerSPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8;
              }
              else
              {
-                int offset2 = compiler->lvaOutgoingArgSpaceSize;
-                if (compiler->compLocallocUsed)
-                {
-                    // Restore sp from fp
-                    GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset2);
-                    compiler->unwindSetFrameReg(REG_FPBASE, offset2);
-                }
-
-                regsToRestoreMask &= ~(RBM_FP | RBM_RA); // We'll restore FP/RA at the end.
-
-                GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, offset2 + 8);
-                compiler->unwindSaveReg(REG_RA, offset2 + 8);
-
-                GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, offset2);
-                compiler->unwindSaveReg(REG_FP, offset2);
-
-                calleeSaveSPOffset = totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES;
-                calleeSaveSPDelta  = AlignUp((UINT)calleeSaveSPOffset, STACK_ALIGN);
-                calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPOffset;
-
-                genStackPointerAdjustment(totalFrameSize - calleeSaveSPDelta, rsGetRsvdReg(), nullptr,
-                                          /* reportUnwindData */ true);
+                genStackPointerAdjustment(calleeSaveSPOffset, REG_RA, nullptr, /* reportUnwindData */ true);
              }
+            remainingSPSize    = totalFrameSize - calleeSaveSPOffset;
+            calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize - calleeSaveSPOffset;
+        }
+        else
+        {
+            if (compiler->compLocallocUsed)
+            {
+                callerSPtoFPdelta = (compiler->compCalleeRegsPushed << 3) - 8 + compiler->lvaOutgoingArgSpaceSize;
+            }
+            calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize;
+            // remainingSPSize = totalFrameSize;
          }
      }
-    else
+
+    if (compiler->compLocallocUsed)
      {
-        // No frame pointer (no chaining).
-        NYI_RISCV64("Frame without frame pointer");
-        calleeSaveSPOffset = 0;
+        // restore sp form fp: addi sp, -#callerSPtoFPdelta(fp)
+        emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -callerSPtoFPdelta);
+        compiler->unwindSetFrameReg(REG_FPBASE, callerSPtoFPdelta);
      }
  
-    JITDUMP("    calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta);
-    genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
+    JITDUMP("    calleeSaveSPOffset=%d, callerSPtoFPdelta=%d\n", calleeSaveSPOffset, callerSPtoFPdelta);
+    genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, 0);
  
-    if (frameType == 1)
-    {
-        calleeSaveSPOffset = compiler->lvaOutgoingArgSpaceSize;
+    // restore ra/fp regs
+    calleeSaveSPOffset += (compiler->compCalleeRegsPushed - 2) << 3;
  
-        GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset + 8);
-        compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset + 8);
+    emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_RA, REG_SPBASE, calleeSaveSPOffset);
+    compiler->unwindSaveReg(REG_RA, calleeSaveSPOffset);
  
-        GetEmitter()->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset);
-        compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset);
+    emit->emitIns_R_R_I(INS_ld, EA_PTRSIZE, REG_FP, REG_SPBASE, calleeSaveSPOffset + 8);
+    compiler->unwindSaveReg(REG_FP, calleeSaveSPOffset + 8);
  
-        GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
-        compiler->unwindAllocStack(totalFrameSize);
-    }
-    else if (frameType == 2)
+    if (emitter::isValidUimm11(remainingSPSize))
      {
-        // had done.
+        emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, remainingSPSize);
      }
      else
      {
-        unreached();
+        regNumber tempReg = rsGetRsvdReg();
+        emit->emitLoadImmediate(EA_PTRSIZE, tempReg, remainingSPSize);
+        emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, tempReg);
+    }
+    compiler->unwindAllocStack(remainingSPSize);
+
+    // for OSR we have to adjust SP to remove tier0 frame
+    if (compiler->opts.IsOSR())
+    {
+        const int tier0FrameSize = compiler->info.compPatchpointInfo->TotalFrameSize();
+        JITDUMP("Extra SP adjust for OSR to pop off Tier0 frame: %d bytes\n", tier0FrameSize);
+
+        if (emitter::isValidUimm11(tier0FrameSize))
+        {
+            emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, tier0FrameSize);
+        }
+        else
+        {
+            regNumber tempReg = rsGetRsvdReg();
+            emit->emitLoadImmediate(EA_PTRSIZE, tempReg, tier0FrameSize);
+            emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, tempReg);
+        }
+        compiler->unwindAllocStack(tier0FrameSize);
      }
  }
  
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp

index c45a5d5..8b67c7c 100644 (file)
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -5643,7 +5643,7 @@ void Compiler::generatePatchpointInfo()
      //
      const int totalFrameSize = codeGen->genTotalFrameSize() + TARGET_POINTER_SIZE;
      const int offsetAdjust   = 0;
-#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
      // SP is not manipulated by calls so no frame size adjustment needed.
      // Local Offsets may need adjusting, if FP is at bottom of frame.
      //
@@ -6893,7 +6893,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
          {
              frameSizeUpdate = 8;
          }
-#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
          if ((totalFrameSize & 0xf) != 0)
          {
              frameSizeUpdate = 8;
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h

index 948a5de..5e457db 100644 (file)
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -9085,6 +9085,7 @@ public:
          // | arm64       |   256  |   128  | ldp/stp (2x128bit)
          // | arm         |    32  |    16  | no SIMD support
          // | loongarch64 |    64  |    32  | no SIMD support
+        // | riscv64     |    64  |    32  | no SIMD support
          //
          // We might want to use a different multiplier for truly hot/cold blocks based on PGO data
          //
diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp

index 8c8af62..ea0b361 100644 (file)
--- a/src/coreclr/jit/ee_il_dll.cpp
+++ b/src/coreclr/jit/ee_il_dll.cpp
@@ -1141,9 +1141,10 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
  
  #endif // DEBUG
  
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  
-    // For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
+    // For arm64/LoongArch64/RISCV64, we want to allocate JIT data always adjacent to code similar to what native
+    // compiler does.
      // This way allows us to use a single `ldr` to access such data like float constant/jmp table.
      // For LoongArch64 using `pcaddi + ld` to access such data.
  
@@ -1157,7 +1158,7 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
      args->hotCodeSize                 = roDataOffset + args->roDataSize;
      args->roDataSize                  = 0;
  
-#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  
      info.compCompHnd->allocMem(args);
  
@@ -1174,7 +1175,7 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
  
  #endif // DEBUG
  
-#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  
      // Fix up data section pointers.
      assert(args->roDataBlock == nullptr);
@@ -1182,7 +1183,7 @@ void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSection
      args->roDataBlock   = ((BYTE*)args->hotCodeBlock) + roDataOffset;
      args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset;
  
-#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  }
  
  void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize)
diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp

index bbf5b95..d6494f5 100644 (file)
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -703,7 +703,7 @@ void emitLclVarAddr::initLclVarAddr(int varNum, unsigned offset)
  }
  
  // Returns the variable to access. Note that it returns a negative number for compiler spill temps.
-int emitLclVarAddr::lvaVarNum()
+int emitLclVarAddr::lvaVarNum() const
  {
      switch (_lvaTag)
      {
@@ -717,7 +717,7 @@ int emitLclVarAddr::lvaVarNum()
      }
  }
  
-unsigned emitLclVarAddr::lvaOffset() // returns the offset into the variable to access
+unsigned emitLclVarAddr::lvaOffset() const // returns the offset into the variable to access
  {
      switch (_lvaTag)
      {
@@ -9729,7 +9729,7 @@ void emitter::emitRemoveLastInstruction()
   *  emitGetInsSC: Get the instruction's constant value.
   */
  
-cnsval_ssize_t emitter::emitGetInsSC(instrDesc* id)
+cnsval_ssize_t emitter::emitGetInsSC(const instrDesc* id) const
  {
  #ifdef TARGET_ARM // should it be TARGET_ARMARCH? Why do we need this? Note that on ARM64 we store scaled immediates
                    // for some formats
diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h

index 48e6c98..ff4dbdb 100644 (file)
--- a/src/coreclr/jit/emit.h
+++ b/src/coreclr/jit/emit.h
@@ -421,8 +421,9 @@ struct emitLclVarAddr
      // Constructor
      void initLclVarAddr(int varNum, unsigned offset);
  
-    int lvaVarNum(); // Returns the variable to access. Note that it returns a negative number for compiler spill temps.
-    unsigned lvaOffset(); // returns the offset into the variable to access
+    int lvaVarNum() const; // Returns the variable to access. Note that it returns a negative number for compiler spill
+                           // temps.
+    unsigned lvaOffset() const; // returns the offset into the variable to access
  
      // This struct should be 32 bits in size for the release build.
      // We have this constraint because this type is used in a union
@@ -2078,7 +2079,7 @@ protected:
      static const IS_INFO emitGetSchedInfo(insFormat f);
  #endif // TARGET_XARCH
  
-    cnsval_ssize_t emitGetInsSC(instrDesc* id);
+    cnsval_ssize_t emitGetInsSC(const instrDesc* id) const;
      unsigned emitInsCount;
  
      /************************************************************************/
diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp

index 75a16d8..eb60190 100644 (file)
--- a/src/coreclr/jit/emitriscv64.cpp
+++ b/src/coreclr/jit/emitriscv64.cpp
@@ -188,7 +188,7 @@ bool emitter::emitInsIsLoadOrStore(instruction ins)
   *  Returns the specific encoding of the given CPU instruction.
   */
  
-inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/)
+inline emitter::code_t emitter::emitInsCode(instruction ins /*, insFormat fmt*/) const
  {
      code_t code = BAD_CODE;
  
@@ -479,7 +479,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t
          case INS_auipc:
              assert(reg != REG_R0);
              assert(isGeneralRegister(reg));
-            assert((((size_t)imm) >> 20) == 0);
+            assert(isValidSimm20(imm));
  
              code |= reg << 7;
              code |= (imm & 0xfffff) << 12;
@@ -588,7 +588,7 @@ void emitter::emitIns_R_R(
  {
      code_t code = emitInsCode(ins);
  
-    if (INS_mov == ins)
+    if (INS_mov == ins || INS_sext_w == ins)
      {
          assert(isGeneralRegisterOrR0(reg1));
          assert(isGeneralRegisterOrR0(reg2));
@@ -625,7 +625,8 @@ void emitter::emitIns_R_R(
          assert(isGeneralRegisterOrR0(reg2));
          code |= (reg1 & 0x1f) << 7;
          code |= reg2 << 15;
-        code |= 0x7 << 12;
+        if (INS_fcvt_d_w != ins && INS_fcvt_d_wu != ins) // fcvt.d.w[u] always produces an exact result
+            code |= 0x7 << 12;                           // round according to frm status register
      }
      else if (INS_fcvt_s_d == ins || INS_fcvt_d_s == ins)
      {
@@ -633,7 +634,8 @@ void emitter::emitIns_R_R(
          assert(isFloatReg(reg2));
          code |= (reg1 & 0x1f) << 7;
          code |= (reg2 & 0x1f) << 15;
-        code |= 0x7 << 12;
+        if (INS_fcvt_d_s != ins) // fcvt.d.s never rounds
+            code |= 0x7 << 12;   // round according to frm status register
      }
      else
      {
@@ -954,7 +956,9 @@ void emitter::emitIns_R_C(
          id->idCodeSize(8);
      }
      else
-        id->idCodeSize(16);
+    {
+        id->idCodeSize(24);
+    }
  
      if (EA_IS_GCREF(attr))
      {
@@ -1054,11 +1058,11 @@ void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNu
      //   auipc reg, offset-hi20
      //   addi  reg, reg, offset-lo12
      //
-    // else:  3-ins:
-    //   lui  tmp, dst-hi-20bits
+    // else:  5-ins:
+    //   lui  tmp, dst-lo-20bits
      //   addi tmp, tmp, dst-lo-12bits
-    //   lui  reg, 0xff << 12
-    //   slli reg, reg, 32
+    //   lui reg, dst-hi-15bits
+    //   slli reg, reg, 20
      //   add  reg, tmp, reg
  
      instrDesc* id = emitNewInstr(attr);
@@ -1251,7 +1255,7 @@ void emitter::emitLoadImmediate(emitAttr size, regNumber reg, ssize_t imm)
      // Since ADDIW use sign extension fo immediate
      // we have to adjust higher 19 bit loaded by LUI
      // for case when low part is bigger than 0x800.
-    UINT32 high19 = (high31 + 0x800) >> 12;
+    INT32 high19 = ((int32_t)(high31 + 0x800)) >> 12;
  
      emitIns_R_I(INS_lui, size, reg, high19);
      emitIns_R_R_I(INS_addiw, size, reg, reg, high31 & 0xFFF);
@@ -1423,7 +1427,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
          assert(callType == EC_FUNC_TOKEN);
          assert(addr != NULL);
  
-        addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jirl ra/r0,rd,0
+        addr = (void*)(((size_t)addr) + (isJump ? 0 : 1)); // NOTE: low-bit0 is used for jalr ra/r0,rd,0
          id->idAddr()->iiaAddr = (BYTE*)addr;
  
          if (emitComp->opts.compReloc)
@@ -1446,11 +1450,14 @@ void emitter::emitIns_Call(EmitCallType          callType,
                     VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
          }
      }
-
-    id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
-    id->idDebugOnlyInfo()->idCallSig   = sigInfo;
  #endif // DEBUG
  
+    if (m_debugInfoSize > 0)
+    {
+        INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo);
+        id->idDebugOnlyInfo()->idMemCookie = reinterpret_cast<size_t>(methHnd); // method token
+    }
+
  #ifdef LATE_DISASM
      if (addr != nullptr)
      {
@@ -1466,7 +1473,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
   *  Output a call instruction.
   */
  
-unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
+unsigned emitter::emitOutputCall(const insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
  {
      unsigned char callInstrSize = sizeof(code_t); // 4 bytes
      regMaskTP     gcrefRegs;
@@ -1546,7 +1553,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
  #endif
          emitOutput_Instr(dst, 0x00000067 | (REG_DEFAULT_HELPER_CALL_TARGET << 15) | reg2 << 7);
  
-        emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_JALR);
+        emitRecordRelocation(dst - 4, (BYTE*)addr, IMAGE_REL_RISCV64_PC);
      }
      else
      {
@@ -1560,7 +1567,7 @@ unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t
          // jalr t2
  
          ssize_t imm = (ssize_t)(id->idAddr()->iiaAddr);
-        assert((imm >> 32) <= 0xff);
+        assert((uint64_t)(imm >> 32) <= 0x7fff); // RISC-V Linux Kernel SV48
  
          int reg2 = (int)(imm & 1);
          imm -= reg2;
@@ -2115,20 +2122,578 @@ AGAIN:
   *  Emit a 32-bit RISCV64 instruction
   */
  
-unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code)
+unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) const
  {
-    assert(sizeof(code_t) == 4);
-    memcpy(dst + writeableOffset, &code, sizeof(code_t));
+    assert(dst != nullptr);
+    static_assert(sizeof(code_t) == 4, "code_t must be 4 bytes");
+    memcpy(dst + writeableOffset, &code, sizeof(code));
      return sizeof(code_t);
  }
  
+static inline void assertCodeLength(size_t code, uint8_t size)
+{
+    assert((code >> size) == 0);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 R-Type instruction
+ *
+ *  Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
+ *  R-Type layout:
+ *  31-------25-24---20-19--15-14------12-11-----------7-6------------0
+ *  | funct7   |  rs2  | rs1  |  funct3  |      rd      |   opcode    |
+ *  -------------------------------------------------------------------
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeRTypeInstr(
+    unsigned opcode, unsigned rd, unsigned funct3, unsigned rs1, unsigned rs2, unsigned funct7)
+{
+    assertCodeLength(opcode, 7);
+    assertCodeLength(rd, 5);
+    assertCodeLength(funct3, 3);
+    assertCodeLength(rs1, 5);
+    assertCodeLength(rs2, 5);
+    assertCodeLength(funct7, 7);
+
+    return opcode | (rd << 7) | (funct3 << 12) | (rs1 << 15) | (rs2 << 20) | (funct7 << 25);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 I-Type instruction
+ *
+ *  Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
+ *  I-Type layout:
+ *  31------------20-19-----15-14------12-11-----------7-6------------0
+ *  |   imm[11:0]   |   rs1   |  funct3  |      rd      |   opcode    |
+ *  -------------------------------------------------------------------
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeITypeInstr(
+    unsigned opcode, unsigned rd, unsigned funct3, unsigned rs1, unsigned imm12)
+{
+    assertCodeLength(opcode, 7);
+    assertCodeLength(rd, 5);
+    assertCodeLength(funct3, 3);
+    assertCodeLength(rs1, 5);
+    // This assert may be triggered by the untrimmed signed integers. Please refer to the TrimSigned helpers
+    assertCodeLength(imm12, 12);
+
+    return opcode | (rd << 7) | (funct3 << 12) | (rs1 << 15) | (imm12 << 20);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 S-Type instruction
+ *
+ *  Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
+ *  S-Type layout:
+ *  31-------25-24---20-19--15-14------12-11-----------7-6------------0
+ *  |imm[11:5] |  rs2  | rs1  |  funct3  |   imm[4:0]   |   opcode    |
+ *  -------------------------------------------------------------------
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeSTypeInstr(
+    unsigned opcode, unsigned funct3, unsigned rs1, unsigned rs2, unsigned imm12)
+{
+    static constexpr unsigned kLoMask = 0x1f; // 0b00011111
+    static constexpr unsigned kHiMask = 0x7f; // 0b01111111
+
+    assertCodeLength(opcode, 7);
+    assertCodeLength(funct3, 3);
+    assertCodeLength(rs1, 5);
+    assertCodeLength(rs2, 5);
+    // This assert may be triggered by the untrimmed signed integers. Please refer to the TrimSigned helpers
+    assertCodeLength(imm12, 12);
+
+    unsigned imm12Lo = imm12 & kLoMask;
+    unsigned imm12Hi = (imm12 >> 5) & kHiMask;
+
+    return opcode | (imm12Lo << 7) | (funct3 << 12) | (rs1 << 15) | (rs2 << 20) | (imm12Hi << 25);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 U-Type instruction
+ *
+ *  Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
+ *  U-Type layout:
+ *  31---------------------------------12-11-----------7-6------------0
+ *  |             imm[31:12]             |      rd      |   opcode    |
+ *  -------------------------------------------------------------------
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeUTypeInstr(unsigned opcode, unsigned rd, unsigned imm20)
+{
+    assertCodeLength(opcode, 7);
+    assertCodeLength(rd, 5);
+    // This assert may be triggered by the untrimmed signed integers. Please refer to the TrimSigned helpers
+    assertCodeLength(imm20, 20);
+
+    return opcode | (rd << 7) | (imm20 << 12);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 B-Type instruction
+ *
+ *  Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
+ *  B-Type layout:
+ *  31-------30-----25-24-20-19-15-14--12-11-------8----7----6--------0
+ *  |imm[12]|imm[10:5]| rs2 | rs1 |funct3|  imm[4:1]|imm[11]| opcode  |
+ *  -------------------------------------------------------------------
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeBTypeInstr(
+    unsigned opcode, unsigned funct3, unsigned rs1, unsigned rs2, unsigned imm13)
+{
+    static constexpr unsigned kLoSectionMask = 0x0f; // 0b00001111
+    static constexpr unsigned kHiSectionMask = 0x3f; // 0b00111111
+    static constexpr unsigned kBitMask       = 0x01;
+
+    assertCodeLength(opcode, 7);
+    assertCodeLength(funct3, 3);
+    assertCodeLength(rs1, 5);
+    assertCodeLength(rs2, 5);
+    // This assert may be triggered by the untrimmed signed integers. Please refer to the TrimSigned helpers
+    assertCodeLength(imm13, 13);
+    assert((imm13 & 0x01) == 0);
+
+    unsigned imm12          = imm13 >> 1;
+    unsigned imm12LoSection = imm12 & kLoSectionMask;
+    unsigned imm12LoBit     = (imm12 >> 10) & kBitMask;
+    unsigned imm12HiSection = (imm12 >> 4) & kHiSectionMask;
+    unsigned imm12HiBit     = (imm12 >> 11) & kBitMask;
+
+    return opcode | (imm12LoBit << 7) | (imm12LoSection << 8) | (funct3 << 12) | (rs1 << 15) | (rs2 << 20) |
+           (imm12HiSection << 25) | (imm12HiBit << 31);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 J-Type instruction
+ *
+ *  Note: Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
+ *  J-Type layout:
+ *  31-------30--------21----20---19----------12-11----7-6------------0
+ *  |imm[20]| imm[10:1]  |imm[11]|  imm[19:12]  |  rd   |   opcode    |
+ *  -------------------------------------------------------------------
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeJTypeInstr(unsigned opcode, unsigned rd, unsigned imm21)
+{
+    static constexpr unsigned kHiSectionMask = 0x3ff; // 0b1111111111
+    static constexpr unsigned kLoSectionMask = 0xff;  // 0b11111111
+    static constexpr unsigned kBitMask       = 0x01;
+
+    assertCodeLength(opcode, 7);
+    assertCodeLength(rd, 5);
+    // This assert may be triggered by the untrimmed signed integers. Please refer to the TrimSigned helpers
+    assertCodeLength(imm21, 21);
+    assert((imm21 & 0x01) == 0);
+
+    unsigned imm20          = imm21 >> 1;
+    unsigned imm20HiSection = imm20 & kHiSectionMask;
+    unsigned imm20HiBit     = (imm20 >> 19) & kBitMask;
+    unsigned imm20LoSection = (imm20 >> 11) & kLoSectionMask;
+    unsigned imm20LoBit     = (imm20 >> 10) & kBitMask;
+
+    return opcode | (rd << 7) | (imm20LoSection << 12) | (imm20LoBit << 20) | (imm20HiSection << 21) |
+           (imm20HiBit << 31);
+}
+
+static constexpr unsigned kInstructionOpcodeMask = 0x7f;
+static constexpr unsigned kInstructionFunct3Mask = 0x7000;
+static constexpr unsigned kInstructionFunct5Mask = 0xf8000000;
+static constexpr unsigned kInstructionFunct7Mask = 0xfe000000;
+static constexpr unsigned kInstructionFunct2Mask = 0x06000000;
+
+#ifdef DEBUG
+
+/*static*/ void emitter::emitOutput_RTypeInstr_SanityCheck(instruction ins, regNumber rd, regNumber rs1, regNumber rs2)
+{
+    switch (ins)
+    {
+        case INS_add:
+        case INS_sub:
+        case INS_sll:
+        case INS_slt:
+        case INS_sltu:
+        case INS_xor:
+        case INS_srl:
+        case INS_sra:
+        case INS_or:
+        case INS_and:
+        case INS_addw:
+        case INS_subw:
+        case INS_sllw:
+        case INS_srlw:
+        case INS_sraw:
+        case INS_mul:
+        case INS_mulh:
+        case INS_mulhsu:
+        case INS_mulhu:
+        case INS_div:
+        case INS_divu:
+        case INS_rem:
+        case INS_remu:
+        case INS_mulw:
+        case INS_divw:
+        case INS_divuw:
+        case INS_remw:
+        case INS_remuw:
+            assert(isGeneralRegisterOrR0(rd));
+            assert(isGeneralRegisterOrR0(rs1));
+            assert(isGeneralRegisterOrR0(rs2));
+            break;
+        case INS_fsgnj_s:
+        case INS_fsgnjn_s:
+        case INS_fsgnjx_s:
+        case INS_fmin_s:
+        case INS_fmax_s:
+        case INS_fsgnj_d:
+        case INS_fsgnjn_d:
+        case INS_fsgnjx_d:
+        case INS_fmin_d:
+        case INS_fmax_d:
+            assert(isFloatReg(rd));
+            assert(isFloatReg(rs1));
+            assert(isFloatReg(rs2));
+            break;
+        case INS_feq_s:
+        case INS_feq_d:
+        case INS_flt_d:
+        case INS_flt_s:
+        case INS_fle_s:
+        case INS_fle_d:
+            assert(isGeneralRegisterOrR0(rd));
+            assert(isFloatReg(rs1));
+            assert(isFloatReg(rs2));
+            break;
+        case INS_fmv_w_x:
+        case INS_fmv_d_x:
+            assert(isFloatReg(rd));
+            assert(isGeneralRegisterOrR0(rs1));
+            assert(rs2 == 0);
+            break;
+        case INS_fmv_x_d:
+        case INS_fmv_x_w:
+        case INS_fclass_s:
+        case INS_fclass_d:
+            assert(isGeneralRegisterOrR0(rd));
+            assert(isFloatReg(rs1));
+            assert(rs2 == 0);
+            break;
+        default:
+            NO_WAY("Illegal ins within emitOutput_RTypeInstr!");
+            break;
+    }
+}
+
+/*static*/ void emitter::emitOutput_ITypeInstr_SanityCheck(
+    instruction ins, regNumber rd, regNumber rs1, unsigned immediate, unsigned opcode)
+{
+    switch (ins)
+    {
+        case INS_mov:
+        case INS_jalr:
+        case INS_lb:
+        case INS_lh:
+        case INS_lw:
+        case INS_lbu:
+        case INS_lhu:
+        case INS_addi:
+        case INS_slti:
+        case INS_sltiu:
+        case INS_xori:
+        case INS_ori:
+        case INS_andi:
+        case INS_lwu:
+        case INS_ld:
+        case INS_addiw:
+        case INS_csrrw:
+        case INS_csrrs:
+        case INS_csrrc:
+            assert(isGeneralRegisterOrR0(rd));
+            assert(isGeneralRegisterOrR0(rs1));
+            assert((opcode & kInstructionFunct7Mask) == 0);
+            break;
+        case INS_flw:
+        case INS_fld:
+            assert(isFloatReg(rd));
+            assert(isGeneralRegisterOrR0(rs1));
+            assert((opcode & kInstructionFunct7Mask) == 0);
+            break;
+        case INS_slli:
+        case INS_srli:
+        case INS_srai:
+            assert(immediate < 64);
+            assert(isGeneralRegisterOrR0(rd));
+            assert(isGeneralRegisterOrR0(rs1));
+            break;
+        case INS_slliw:
+        case INS_srliw:
+        case INS_sraiw:
+            assert(immediate < 32);
+            assert(isGeneralRegisterOrR0(rd));
+            assert(isGeneralRegisterOrR0(rs1));
+            break;
+        case INS_csrrwi:
+        case INS_csrrsi:
+        case INS_csrrci:
+            assert(isGeneralRegisterOrR0(rd));
+            assert(rs1 < 32);
+            assert((opcode & kInstructionFunct7Mask) == 0);
+            break;
+        case INS_fence:
+        {
+            assert(rd == REG_ZERO);
+            assert(rs1 == REG_ZERO);
+            ssize_t format = immediate >> 8;
+            assert((format == 0) || (format == 0x8));
+            assert((opcode & kInstructionFunct7Mask) == 0);
+        }
+        break;
+        default:
+            NO_WAY("Illegal ins within emitOutput_ITypeInstr!");
+            break;
+    }
+}
+
+/*static*/ void emitter::emitOutput_STypeInstr_SanityCheck(instruction ins, regNumber rs1, regNumber rs2)
+{
+    switch (ins)
+    {
+        case INS_sb:
+        case INS_sh:
+        case INS_sw:
+        case INS_sd:
+            assert(isGeneralRegister(rs1));
+            assert(isGeneralRegisterOrR0(rs2));
+            break;
+        case INS_fsw:
+        case INS_fsd:
+            assert(isGeneralRegister(rs1));
+            assert(isFloatReg(rs2));
+            break;
+        default:
+            NO_WAY("Illegal ins within emitOutput_STypeInstr!");
+            break;
+    }
+}
+
+/*static*/ void emitter::emitOutput_UTypeInstr_SanityCheck(instruction ins, regNumber rd)
+{
+    switch (ins)
+    {
+        case INS_lui:
+        case INS_auipc:
+            assert(isGeneralRegisterOrR0(rd));
+            break;
+        default:
+            NO_WAY("Illegal ins within emitOutput_UTypeInstr!");
+            break;
+    }
+}
+
+/*static*/ void emitter::emitOutput_BTypeInstr_SanityCheck(instruction ins, regNumber rs1, regNumber rs2)
+{
+    switch (ins)
+    {
+        case INS_beqz:
+        case INS_bnez:
+            assert((rs1 == REG_ZERO) || (rs2 == REG_ZERO));
+            FALLTHROUGH;
+        case INS_beq:
+        case INS_bne:
+        case INS_blt:
+        case INS_bge:
+        case INS_bltu:
+        case INS_bgeu:
+            assert(isGeneralRegisterOrR0(rs1));
+            assert(isGeneralRegisterOrR0(rs2));
+            break;
+        default:
+            NO_WAY("Illegal ins within emitOutput_BTypeInstr!");
+            break;
+    }
+}
+
+/*static*/ void emitter::emitOutput_JTypeInstr_SanityCheck(instruction ins, regNumber rd)
+{
+    switch (ins)
+    {
+        case INS_j:
+            assert(rd == REG_ZERO);
+            break;
+        case INS_jal:
+            assert(isGeneralRegisterOrR0(rd));
+            break;
+        default:
+            NO_WAY("Illegal ins within emitOutput_JTypeInstr!");
+            break;
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Casts an integral or float register from their identification number to
+ *  theirs binary format. In case of the integral registers the encoded number
+ *  is the register id. In case of the floating point registers the encoded
+ *  number is shifted back by the floating point register base (32) (The
+ *  instruction itself specifies whether the register contains floating
+ *  point or integer, in their encoding they are indistinguishable)
+ *
+ */
+
+/*static*/ unsigned emitter::castFloatOrIntegralReg(regNumber reg)
+{
+    static constexpr unsigned kRegisterMask = 0x1f;
+
+    assert(isGeneralRegisterOrR0(reg) || isFloatReg(reg));
+
+    return reg & kRegisterMask;
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 R-Type instruction to the given buffer. Returns a
+ *  length of an encoded instruction opcode
+ *
+ */
+
+unsigned emitter::emitOutput_RTypeInstr(BYTE* dst, instruction ins, regNumber rd, regNumber rs1, regNumber rs2) const
+{
+    unsigned insCode = emitInsCode(ins);
+#ifdef DEBUG
+    emitOutput_RTypeInstr_SanityCheck(ins, rd, rs1, rs2);
+#endif // DEBUG
+    unsigned opcode = insCode & kInstructionOpcodeMask;
+    unsigned funct3 = (insCode & kInstructionFunct3Mask) >> 12;
+    unsigned funct7 = (insCode & kInstructionFunct7Mask) >> 25;
+    return emitOutput_Instr(dst, insEncodeRTypeInstr(opcode, castFloatOrIntegralReg(rd), funct3,
+                                                     castFloatOrIntegralReg(rs1), castFloatOrIntegralReg(rs2), funct7));
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 I-Type instruction to the given buffer. Returns a
+ *  length of an encoded instruction opcode
+ *
+ */
+
+unsigned emitter::emitOutput_ITypeInstr(BYTE* dst, instruction ins, regNumber rd, regNumber rs1, unsigned imm12) const
+{
+    unsigned insCode = emitInsCode(ins);
+#ifdef DEBUG
+    emitOutput_ITypeInstr_SanityCheck(ins, rd, rs1, imm12, insCode);
+#endif // DEBUG
+    unsigned opcode = insCode & kInstructionOpcodeMask;
+    unsigned funct3 = (insCode & kInstructionFunct3Mask) >> 12;
+    unsigned funct7 = (insCode & kInstructionFunct7Mask) >> 20; // only used by some of the immediate shifts
+    return emitOutput_Instr(dst, insEncodeITypeInstr(opcode, castFloatOrIntegralReg(rd), funct3, rs1, imm12 | funct7));
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 S-Type instruction to the given buffer. Returns a
+ *  length of an encoded instruction opcode
+ *
+ */
+
+unsigned emitter::emitOutput_STypeInstr(BYTE* dst, instruction ins, regNumber rs1, regNumber rs2, unsigned imm12) const
+{
+    unsigned insCode = emitInsCode(ins);
+#ifdef DEBUG
+    emitOutput_STypeInstr_SanityCheck(ins, rs1, rs2);
+#endif // DEBUG
+    unsigned opcode = insCode & kInstructionOpcodeMask;
+    unsigned funct3 = (insCode & kInstructionFunct3Mask) >> 12;
+    return emitOutput_Instr(dst, insEncodeSTypeInstr(opcode, funct3, rs1, castFloatOrIntegralReg(rs2), imm12));
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 U-Type instruction to the given buffer. Returns a
+ *  length of an encoded instruction opcode
+ *
+ */
+
+unsigned emitter::emitOutput_UTypeInstr(BYTE* dst, instruction ins, regNumber rd, unsigned imm20) const
+{
+    unsigned insCode = emitInsCode(ins);
+#ifdef DEBUG
+    emitOutput_UTypeInstr_SanityCheck(ins, rd);
+#endif // DEBUG
+    return emitOutput_Instr(dst, insEncodeUTypeInstr(insCode, rd, imm20));
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 B-Type instruction to the given buffer. Returns a
+ *  length of an encoded instruction opcode
+ *
+ */
+
+unsigned emitter::emitOutput_BTypeInstr(BYTE* dst, instruction ins, regNumber rs1, regNumber rs2, unsigned imm13) const
+{
+    unsigned insCode = emitInsCode(ins);
+#ifdef DEBUG
+    emitOutput_BTypeInstr_SanityCheck(ins, rs1, rs2);
+#endif // DEBUG
+    unsigned opcode = insCode & kInstructionOpcodeMask;
+    unsigned funct3 = (insCode & kInstructionFunct3Mask) >> 12;
+    return emitOutput_Instr(dst, insEncodeBTypeInstr(opcode, funct3, rs1, rs2, imm13));
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 B-Type instruction with inverted comparation to
+ *  the given buffer. Returns a length of an encoded instruction opcode
+ *
+ *  Note: Replaces:
+ *      - beqz with bnez and vice versa
+ *      - beq with bne and vice versa
+ *      - blt with bge and vice versa
+ *      - bltu with bgeu and vice versa
+ */
+
+unsigned emitter::emitOutput_BTypeInstr_InvertComparation(
+    BYTE* dst, instruction ins, regNumber rs1, regNumber rs2, unsigned imm13) const
+{
+    unsigned insCode = emitInsCode(ins) ^ 0x1000;
+#ifdef DEBUG
+    emitOutput_BTypeInstr_SanityCheck(ins, rs1, rs2);
+#endif // DEBUG
+    unsigned opcode = insCode & kInstructionOpcodeMask;
+    unsigned funct3 = (insCode & kInstructionFunct3Mask) >> 12;
+    return emitOutput_Instr(dst, insEncodeBTypeInstr(opcode, funct3, rs1, rs2, imm13));
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit RISCV64 J-Type instruction to the given buffer. Returns a
+ *  length of an encoded instruction opcode
+ *
+ */
+
+unsigned emitter::emitOutput_JTypeInstr(BYTE* dst, instruction ins, regNumber rd, unsigned imm21) const
+{
+    unsigned insCode = emitInsCode(ins);
+#ifdef DEBUG
+    emitOutput_JTypeInstr_SanityCheck(ins, rd);
+#endif // JTypeInstructionSanityCheck
+    return emitOutput_Instr(dst, insEncodeJTypeInstr(insCode, rd, imm21));
+}
+
  void emitter::emitOutputInstrJumpDistanceHelper(const insGroup* ig,
                                                  instrDescJmp*   jmp,
                                                  UNATIVE_OFFSET& dstOffs,
                                                  const BYTE*&    dstAddr) const
  {
-    // TODO-RISCV64-BUG: Currently the iiaEncodedInstrCount is not set by the riscv impl making distinguishing the jump
-    // to label and an instruction-count based jumps impossible
      if (jmp->idAddr()->iiaHasInstrCount())
      {
          assert(ig != nullptr);
@@ -2147,7 +2712,13 @@ void emitter::emitOutputInstrJumpDistanceHelper(const insGroup* ig,
      dstAddr = emitOffsetToPtr(dstOffs);
  }
  
-ssize_t emitter::emitOutputInstrJumpDistance(const BYTE* dst, const BYTE* src, const insGroup* ig, instrDescJmp* jmp)
+/*****************************************************************************
+ *
+ *  Calculates a current jump instruction distance
+ *
+ */
+
+ssize_t emitter::emitOutputInstrJumpDistance(const BYTE* src, const insGroup* ig, instrDescJmp* jmp)
  {
      UNATIVE_OFFSET srcOffs = emitCurCodeOffs(src);
      const BYTE*    srcAddr = emitOffsetToPtr(srcOffs);
@@ -2182,672 +2753,464 @@ ssize_t emitter::emitOutputInstrJumpDistance(const BYTE* dst, const BYTE* src, c
      return distVal;
  }
  
-/*****************************************************************************
- *
- *  Append the machine code corresponding to the given instruction descriptor
- *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
- *  is the instruction group that contains the instruction. Updates '*dp' to
- *  point past the generated code, and returns the size of the instruction
- *  descriptor in bytes.
- */
+static constexpr size_t NBitMask(uint8_t bits)
+{
+    return (static_cast<size_t>(1) << bits) - 1;
+}
  
-size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+template <uint8_t MaskSize>
+static ssize_t LowerNBitsOfWord(ssize_t word)
  {
-    BYTE* const       dst    = *dp;
-    BYTE*             dstRW  = *dp + writeableOffset;
-    BYTE*             dstRW2 = dstRW + 4; // addr for updating gc info if needed.
-    const BYTE* const odstRW = dstRW;
-    const BYTE* const odst   = *dp;
-    code_t            code   = 0;
-    instruction       ins;
-    size_t            sz; // = emitSizeOfInsDsc(id);
+    static_assert(MaskSize < 32, "Given mask size is bigger than the word itself");
+    static_assert(MaskSize > 0, "Given mask size cannot be zero");
  
-    assert(REG_NA == (int)REG_NA);
+    static constexpr size_t kMask = NBitMask(MaskSize);
  
-    insOpts insOp = id->idInsOpt();
+    return word & kMask;
+}
  
-    switch (insOp)
-    {
-        case INS_OPTS_RELOC:
-        {
-            regNumber reg1 = id->idReg1();
+template <uint8_t MaskSize>
+static ssize_t UpperNBitsOfWord(ssize_t word)
+{
+    static constexpr size_t kShift = 32 - MaskSize;
  
-            *(code_t*)dstRW = 0x00000017 | (code_t)(reg1 << 7);
+    return LowerNBitsOfWord<MaskSize>(word >> kShift);
+}
  
-            dstRW += 4;
+template <uint8_t MaskSize>
+static ssize_t UpperNBitsOfWordSignExtend(ssize_t word)
+{
+    static constexpr unsigned kSignExtend = 1 << (31 - MaskSize);
  
-#ifdef DEBUG
-            code = emitInsCode(INS_auipc);
-            assert(code == 0x00000017);
-            code = emitInsCode(INS_addi);
-            assert(code == 0x00000013);
-            code = emitInsCode(INS_ld);
-            assert(code == 0x00003003);
-#endif
+    return UpperNBitsOfWord<MaskSize>(word + kSignExtend);
+}
  
-            if (id->idIsCnsReloc())
-            {
-                ins             = INS_addi;
-                *(code_t*)dstRW = 0x00000013 | (code_t)(reg1 << 7) | (code_t)(reg1 << 15);
-            }
-            else
-            {
-                assert(id->idIsDspReloc());
-                ins             = INS_ld;
-                *(code_t*)dstRW = 0x00003003 | (code_t)(reg1 << 7) | (code_t)(reg1 << 15);
-            }
+static ssize_t UpperWordOfDoubleWord(ssize_t immediate)
+{
+    return immediate >> 32;
+}
  
-            dstRW += 4;
+static ssize_t LowerWordOfDoubleWord(ssize_t immediate)
+{
+    static constexpr size_t kWordMask = NBitMask(32);
  
-            emitRecordRelocation(dstRW - 8 - writeableOffset, id->idAddr()->iiaAddr, IMAGE_REL_RISCV64_PC);
+    return immediate & kWordMask;
+}
  
-            sz = sizeof(instrDesc);
-        }
-        break;
-        case INS_OPTS_I:
-        {
-            ssize_t   imm  = (ssize_t)(id->idAddr()->iiaAddr);
-            regNumber reg1 = id->idReg1();
+template <uint8_t UpperMaskSize, uint8_t LowerMaskSize>
+static ssize_t DoubleWordSignExtend(ssize_t doubleWord)
+{
+    static constexpr size_t kLowerSignExtend = static_cast<size_t>(1) << (63 - LowerMaskSize);
+    static constexpr size_t kUpperSignExtend = static_cast<size_t>(1) << (63 - UpperMaskSize);
  
-            switch (id->idCodeSize())
-            {
-                case 8:
-                {
-                    if (id->idReg2())
-                    { // special for INT64_MAX or UINT32_MAX;
-                        code = emitInsCode(INS_addi);
-                        code |= (code_t)reg1 << 7;
-                        code |= (code_t)REG_R0 << 15;
-                        code |= 0xfff << 10;
-
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        ssize_t ui6 = (imm == INT64_MAX) ? 1 : 32;
-                        code        = emitInsCode(INS_srli);
-                        code |= ((code_t)(reg1 << 7) | ((code_t)(reg1 << 15)) | (ui6 << 20));
-                        *(code_t*)dstRW = code;
-                    }
-                    else
-                    {
-                        code = emitInsCode(INS_lui);
-                        code |= (code_t)(reg1 << 7);
-                        code |= ((code_t)((imm + 0x800) >> 12) & 0xfffff) << 12;
-
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_addi);
-                        code |= (code_t)reg1 << 7;
-                        code |= (code_t)reg1 << 15;
-                        code |= (code_t)(imm & 0xfff) << 20;
-                        *(code_t*)dstRW = code;
-                    }
-                    break;
-                }
-                case 32:
-                {
-                    ssize_t high = (imm >> 32) & 0xffffffff;
-                    code         = emitInsCode(INS_lui);
-                    code |= (code_t)reg1 << 7;
-                    code |= ((code_t)((high + 0x800) >> 12) & 0xfffff) << 12;
-
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_addi);
-                    code |= (code_t)reg1 << 7;
-                    code |= (code_t)reg1 << 15;
-                    code |= (code_t)(high & 0xfff) << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    ssize_t low = imm & 0xffffffff;
-
-                    code = emitInsCode(INS_slli);
-                    code |= (code_t)reg1 << 7;
-                    code |= (code_t)reg1 << 15;
-                    code |= (code_t)11 << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_addi);
-                    code |= (code_t)reg1 << 7;
-                    code |= (code_t)reg1 << 15;
-                    code |= (code_t)((low >> 21) & 0x7ff) << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_slli);
-                    code |= (code_t)reg1 << 7;
-                    code |= (code_t)reg1 << 15;
-                    code |= (code_t)11 << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_addi);
-                    code |= (code_t)reg1 << 7;
-                    code |= (code_t)reg1 << 15;
-                    code |= (code_t)((low >> 10) & 0x7ff) << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_slli);
-                    code |= (code_t)reg1 << 7;
-                    code |= (code_t)reg1 << 15;
-                    code |= (code_t)10 << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_addi);
-                    code |= (code_t)reg1 << 7;
-                    code |= (code_t)reg1 << 15;
-                    code |= (code_t)((low)&0x3ff) << 20;
-                    *(code_t*)dstRW = code;
-                    break;
-                }
-                default:
-                    unreached();
-                    break;
-            }
+    return doubleWord + (kLowerSignExtend | kUpperSignExtend);
+}
  
-            ins = INS_addi;
-            dstRW += 4;
+template <uint8_t UpperMaskSize>
+static ssize_t UpperWordOfDoubleWordSingleSignExtend(ssize_t doubleWord)
+{
+    static constexpr size_t kUpperSignExtend = static_cast<size_t>(1) << (31 - UpperMaskSize);
  
-            sz = sizeof(instrDesc);
-        }
-        break;
-        case INS_OPTS_RC:
-        {
-            // Reference to JIT data
-            assert(id->idAddr()->iiaIsJitDataOffset());
-            assert(id->idGCref() == GCT_NONE);
+    return UpperWordOfDoubleWord(doubleWord + kUpperSignExtend);
+}
  
-            int doff = id->idAddr()->iiaGetJitDataOffset();
-            assert(doff >= 0);
+template <uint8_t UpperMaskSize, uint8_t LowerMaskSize>
+static ssize_t UpperWordOfDoubleWordDoubleSignExtend(ssize_t doubleWord)
+{
+    return UpperWordOfDoubleWord(DoubleWordSignExtend<UpperMaskSize, LowerMaskSize>(doubleWord));
+}
  
-            ssize_t imm = emitGetInsSC(id);
-            assert((imm >= 0) && (imm < 0x4000)); // 0x4000 is arbitrary, currently 'imm' is always 0.
+/*static*/ unsigned emitter::TrimSignedToImm12(int imm12)
+{
+    assert(isValidSimm12(imm12));
  
-            unsigned dataOffs = (unsigned)(doff + imm);
+    return static_cast<unsigned>(LowerNBitsOfWord<12>(imm12));
+}
  
-            assert(dataOffs < emitDataSize());
+/*static*/ unsigned emitter::TrimSignedToImm13(int imm13)
+{
+    assert(isValidSimm13(imm13));
  
-            ins            = id->idIns();
-            regNumber reg1 = id->idReg1();
+    return static_cast<unsigned>(LowerNBitsOfWord<13>(imm13));
+}
  
-            if (id->idIsReloc())
-            {
-                // get the addr-offset of the data.
-                imm = (ssize_t)emitConsBlock - (ssize_t)(dstRW - writeableOffset) + dataOffs;
-                assert(imm > 0);
-                assert(!(imm & 3));
+/*static*/ unsigned emitter::TrimSignedToImm20(int imm20)
+{
+    assert(isValidSimm20(imm20));
  
-                doff = (int)(imm & 0xfff);
-                assert(isValidSimm20((imm + 0x800) >> 12));
+    return static_cast<unsigned>(LowerNBitsOfWord<20>(imm20));
+}
  
-#ifdef DEBUG
-                code = emitInsCode(INS_auipc);
-                assert(code == 0x00000017);
-#endif
-                code            = 0x00000017 | (codeGen->rsGetRsvdReg() << 7);
-                *(code_t*)dstRW = code | ((code_t)((imm + 0x800) & 0xfffff000));
-                dstRW += 4;
+/*static*/ unsigned emitter::TrimSignedToImm21(int imm21)
+{
+    assert(isValidSimm21(imm21));
  
-                if (ins == INS_jal)
-                {
-                    assert(isGeneralRegister(reg1));
-                    ins = INS_addi;
-#ifdef DEBUG
-                    code = emitInsCode(INS_addi);
-                    assert(code == 0x00000013);
-#endif
-                    code            = 0x00000013 | (codeGen->rsGetRsvdReg() << 15);
-                    *(code_t*)dstRW = code | ((code_t)reg1 << 7) | (((code_t)doff & 0xfff) << 20);
-                }
-                else
-                {
-                    code = emitInsCode(ins);
-                    code |= (code_t)(reg1 & 0x1f) << 7;
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 15;
-                    code |= (code_t)(doff & 0xfff) << 20;
-                    *(code_t*)dstRW = code;
-                }
-                dstRW += 4;
-            }
-            else
-            {
-                // get the addr of the data.
-                imm = (ssize_t)emitConsBlock + dataOffs;
+    return static_cast<unsigned>(LowerNBitsOfWord<21>(imm21));
+}
  
-                code = emitInsCode(INS_lui);
-                if (ins == INS_jal)
-                {
-                    assert((imm >> 40) == 0);
-
-                    doff = imm & 0x7ff;
-
-                    UINT32 high = imm >> 11;
-
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 7;
-                    code |= (code_t)(((high + 0x800) >> 12) << 12);
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_addi);
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 7;
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 15;
-                    code |= (code_t)(high & 0xFFF) << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_slli);
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 7;
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 15;
-                    code |= (code_t)11 << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    ins  = INS_addi;
-                    code = emitInsCode(INS_addi);
-                    code |= (code_t)reg1 << 7;
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 15;
-                    code |= (code_t)doff << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-                }
-                else
-                {
-                    assert((imm >> 40) == 0);
-
-                    doff        = imm & 0x7ff;
-                    UINT32 high = imm >> 11;
-
-                    code |= (code_t)(codeGen->rsGetRsvdReg() << 7);
-                    code |= (code_t)(((high + 0x800) >> 12) << 12);
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_addi);
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 7;
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 15;
-                    code |= (code_t)(high & 0xFFF) << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(INS_slli);
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 7;
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 15;
-                    code |= (code_t)11 << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-
-                    code = emitInsCode(ins);
-                    code |= (code_t)(reg1 & 0x1f) << 7;
-                    code |= (code_t)codeGen->rsGetRsvdReg() << 15;
-                    code |= (code_t)doff << 20;
-                    *(code_t*)dstRW = code;
-                    dstRW += 4;
-                }
-            }
+BYTE* emitter::emitOutputInstr_OptsReloc(BYTE* dst, const instrDesc* id, instruction* ins)
+{
+    BYTE* const     dstBase = dst;
+    const regNumber reg1    = id->idReg1();
  
-            sz = sizeof(instrDesc);
-        }
-        break;
+    dst += emitOutput_UTypeInstr(dst, INS_auipc, reg1, 0);
  
-        case INS_OPTS_RL:
-        {
-            insGroup* tgtIG          = (insGroup*)emitCodeGetCookie(id->idAddr()->iiaBBlabel);
-            id->idAddr()->iiaIGlabel = tgtIG;
+    if (id->idIsCnsReloc())
+    {
+        *ins = INS_addi;
+    }
+    else
+    {
+        assert(id->idIsDspReloc());
+        *ins = INS_ld;
+    }
  
-            regNumber reg1 = id->idReg1();
-            assert(isGeneralRegister(reg1));
+    dst += emitOutput_ITypeInstr(dst, *ins, reg1, reg1, 0);
  
-            if (id->idIsReloc())
-            {
-                ssize_t imm = (ssize_t)tgtIG->igOffs;
-                imm         = (ssize_t)emitCodeBlock + imm - (ssize_t)(dstRW - writeableOffset);
-                assert((imm & 3) == 0);
+    emitRecordRelocation(dstBase, id->idAddr()->iiaAddr, IMAGE_REL_RISCV64_PC);
  
-                int doff = (int)(imm & 0xfff);
-                assert(isValidSimm20((imm + 0x800) >> 12));
+    return dst;
+}
  
-                code            = 0x00000017;
-                *(code_t*)dstRW = code | (code_t)reg1 << 7 | ((imm + 0x800) & 0xfffff000);
-                dstRW += 4;
-#ifdef DEBUG
-                code = emitInsCode(INS_auipc);
-                assert(code == 0x00000017);
-                code = emitInsCode(INS_addi);
-                assert(code == 0x00000013);
-#endif
-                ins             = INS_addi;
-                *(code_t*)dstRW = 0x00000013 | ((code_t)reg1 << 7) | ((code_t)reg1 << 15) | ((doff & 0xfff) << 20);
-            }
-            else
-            {
-                ssize_t imm = (ssize_t)tgtIG->igOffs + (ssize_t)emitCodeBlock;
-                assert((imm >> (32 + 20)) == 0);
-
-                code = emitInsCode(INS_lui);
-                code |= (code_t)codeGen->rsGetRsvdReg() << 7;
-                code |= ((code_t)((imm + 0x800) >> 12) & 0xfffff) << 12;
-
-                *(code_t*)dstRW = code;
-                dstRW += 4;
-
-                code = emitInsCode(INS_addi);
-                code |= (code_t)codeGen->rsGetRsvdReg() << 7;
-                code |= (code_t)codeGen->rsGetRsvdReg() << 15;
-                code |= (code_t)(imm & 0xfff) << 20;
-                *(code_t*)dstRW = code;
-                dstRW += 4;
-
-                code = emitInsCode(INS_addi);
-                code |= (code_t)reg1 << 7;
-                code |= (((imm + 0x80000800) >> 32) & 0xfff) << 20;
-                *(code_t*)dstRW = code;
-                dstRW += 4;
-
-                code = emitInsCode(INS_slli);
-                code |= (code_t)reg1 << 7;
-                code |= (code_t)reg1 << 15;
-                code |= (code_t)32 << 20;
-                *(code_t*)dstRW = code;
-                dstRW += 4;
-
-                ins  = INS_add;
-                code = emitInsCode(INS_add);
-                code |= (code_t)reg1 << 7;
-                code |= (code_t)reg1 << 15;
-                code |= (code_t)codeGen->rsGetRsvdReg() << 20;
-                *(code_t*)dstRW = code;
-            }
+BYTE* emitter::emitOutputInstr_OptsI(BYTE* dst, const instrDesc* id)
+{
+    ssize_t         immediate = reinterpret_cast<ssize_t>(id->idAddr()->iiaAddr);
+    const regNumber reg1      = id->idReg1();
  
-            dstRW += 4;
+    switch (id->idCodeSize())
+    {
+        case 8:
+            return emitOutputInstr_OptsI8(dst, id, immediate, reg1);
+        case 32:
+            return emitOutputInstr_OptsI32(dst, immediate, reg1);
+        default:
+            break;
+    }
+    unreached();
+    return nullptr;
+}
  
-            sz = sizeof(instrDesc);
-        }
-        break;
-        case INS_OPTS_JALR:
-        {
-            instrDescJmp* jmp = (instrDescJmp*)id;
+BYTE* emitter::emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t immediate, regNumber reg1)
+{
+    if (id->idReg2())
+    {
+        // special for INT64_MAX or UINT32_MAX
+        dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, REG_R0, NBitMask(12));
+        const ssize_t shiftValue = (immediate == INT64_MAX) ? 1 : 32;
+        dst += emitOutput_ITypeInstr(dst, INS_srli, reg1, reg1, shiftValue);
+    }
+    else
+    {
+        dst += emitOutput_UTypeInstr(dst, INS_lui, reg1, UpperNBitsOfWordSignExtend<20>(immediate));
+        dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<12>(immediate));
+    }
+    return dst;
+}
  
-            regNumber reg1 = id->idReg1();
-            {
-                ssize_t imm = emitOutputInstrJumpDistance(dstRW, dst, ig, jmp);
-                imm -= 4;
+BYTE* emitter::emitOutputInstr_OptsI32(BYTE* dst, ssize_t immediate, regNumber reg1)
+{
+    const ssize_t upperWord = UpperWordOfDoubleWord(immediate);
+    dst += emitOutput_UTypeInstr(dst, INS_lui, reg1, UpperNBitsOfWordSignExtend<20>(upperWord));
+    dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<12>(upperWord));
+    const ssize_t lowerWord = LowerWordOfDoubleWord(immediate);
+    dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 11);
+    dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<11>(lowerWord >> 21));
+    dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 11);
+    dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<11>(lowerWord >> 10));
+    dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 10);
+    dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<10>(lowerWord));
+    return dst;
+}
  
-                assert((imm & 0x3) == 0);
+BYTE* emitter::emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instruction* ins)
+{
+    assert(id->idAddr()->iiaIsJitDataOffset());
+    assert(id->idGCref() == GCT_NONE);
  
-                ins = jmp->idIns();
-                assert(jmp->idCodeSize() > 4); // The original INS_OPTS_JALR: not used by now!!!
-                switch (jmp->idCodeSize())
-                {
-                    case 8:
-                    {
-                        assert((INS_blt <= ins && ins <= INS_bgeu) || (INS_beq == ins) || (INS_bne == ins) ||
-                               (INS_bnez == ins) || (INS_beqz == ins));
-                        assert(isValidSimm21(imm));
-                        assert((emitInsCode(INS_bne) & 0xefff) == emitInsCode(INS_beq));
-                        assert((emitInsCode(INS_bge) & 0xefff) == emitInsCode(INS_blt));
-                        assert((emitInsCode(INS_bgeu) & 0xefff) == emitInsCode(INS_bltu));
-
-                        regNumber reg2 = REG_R0;
-                        if (INS_beqz != ins && INS_bnez != ins)
-                            reg2 = id->idReg2();
-                        code     = emitInsCode(ins) ^ 0x1000;
-                        code |= (code_t)reg1 << 15; /* rj */
-                        code |= (code_t)reg2 << 20; /* rd */
-                        code |= 0x8 << 7;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_jal);
-                        code |= ((imm >> 12) & 0xff) << 12;
-                        code |= ((imm >> 11) & 0x1) << 20;
-                        code |= ((imm >> 1) & 0x3ff) << 21;
-                        code |= ((imm >> 20) & 0x1) << 31;
-
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-                        break;
-                    }
-                    case 24:
-                    {
-                        assert(ins == INS_j || ins == INS_jal);
-                        // Make target address with offset, then jump (JALR) with the target address
-                        imm               = imm - 2 * 4;
-                        regNumber tmpReg1 = REG_RA;
-                        ssize_t   high    = ((imm + 0x80000000) >> 32) & 0xffffffff;
-                        code              = emitInsCode(INS_lui);
-                        code |= (code_t)tmpReg1 << 7;
-                        code |= ((code_t)((high + 0x800) >> 12) & 0xfffff) << 12;
-
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_addi);
-                        code |= (code_t)tmpReg1 << 7;
-                        code |= (code_t)tmpReg1 << 15;
-                        code |= (code_t)(high & 0xfff) << 20;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_slli);
-                        code |= (code_t)tmpReg1 << 7;
-                        code |= (code_t)tmpReg1 << 15;
-                        code |= (code_t)32 << 20;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        regNumber tmpReg2 = codeGen->rsGetRsvdReg();
-                        ssize_t   low     = imm & 0xffffffff;
-                        code              = emitInsCode(INS_auipc);
-                        code |= (code_t)tmpReg2 << 7;
-                        code |= ((code_t)((low + 0x800) >> 12) & 0xfffff) << 12;
-
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_add);
-                        code |= (code_t)tmpReg2 << 7;
-                        code |= (code_t)tmpReg1 << 15;
-                        code |= (code_t)tmpReg2 << 20;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_jalr);
-                        code |= (code_t)REG_RA << 7; // use REG_RA for returning
-                        code |= (code_t)tmpReg2 << 15;
-                        code |= (code_t)(low & 0xfff) << 20;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-                        break;
-                    }
-                    case 28:
-                    {
-                        assert((INS_blt <= ins && ins <= INS_bgeu) || (INS_beq == ins) || (INS_bne == ins) ||
-                               (INS_bnez == ins) || (INS_beqz == ins));
-                        assert((emitInsCode(INS_bne) & 0xefff) == emitInsCode(INS_beq));
-                        assert((emitInsCode(INS_bge) & 0xefff) == emitInsCode(INS_blt));
-                        assert((emitInsCode(INS_bgeu) & 0xefff) == emitInsCode(INS_bltu));
-
-                        regNumber reg2 = REG_R0;
-                        if (INS_beqz != ins && INS_bnez != ins)
-                            reg2 = id->idReg2();
-                        code     = emitInsCode(ins) ^ 0x1000;
-                        code |= (code_t)reg1 << 15; /* rj */
-                        code |= (code_t)reg2 << 20; /* rd */
-                        code |= 28 << 7;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        // Make target address with offset, then jump (JALR) with the target address
-                        imm               = imm - 2 * 4;
-                        regNumber tmpReg1 = REG_RA;
-                        ssize_t   high    = ((imm + 0x80000000) >> 32) & 0xffffffff;
-                        code              = emitInsCode(INS_lui);
-                        code |= (code_t)tmpReg1 << 7;
-                        code |= ((code_t)((high + 0x800) >> 12) & 0xfffff) << 12;
-
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_addi);
-                        code |= (code_t)tmpReg1 << 7;
-                        code |= (code_t)tmpReg1 << 15;
-                        code |= (code_t)(high & 0xfff) << 20;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_slli);
-                        code |= (code_t)tmpReg1 << 7;
-                        code |= (code_t)tmpReg1 << 15;
-                        code |= (code_t)32 << 20;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        regNumber tmpReg2 = codeGen->rsGetRsvdReg();
-                        ssize_t   low     = imm & 0xffffffff;
-                        code              = emitInsCode(INS_auipc);
-                        code |= (code_t)tmpReg2 << 7;
-                        code |= ((code_t)((low + 0x800) >> 12) & 0xfffff) << 12;
-
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_add);
-                        code |= (code_t)tmpReg2 << 7;
-                        code |= (code_t)tmpReg1 << 15;
-                        code |= (code_t)tmpReg2 << 20;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
-
-                        code = emitInsCode(INS_jalr);
-                        code |= (code_t)REG_RA << 7; // use REG_RA for returning
-                        code |= (code_t)tmpReg2 << 15;
-                        code |= (code_t)(low & 0xfff) << 20;
-                        *(code_t*)dstRW = code;
-                        dstRW += 4;
+    const int dataOffs = id->idAddr()->iiaGetJitDataOffset();
+    assert(dataOffs >= 0);
  
-                        break;
-                    }
+    const ssize_t immediate = emitGetInsSC(id);
+    assert((immediate >= 0) && (immediate < 0x4000)); // 0x4000 is arbitrary, currently 'imm' is always 0.
  
-                    default:
-                        unreached();
-                        break;
-                }
-            }
-            sz = sizeof(instrDescJmp);
-        }
-        break;
-        case INS_OPTS_J_cond:
-        {
-            ssize_t imm = emitOutputInstrJumpDistance(dstRW, dst, ig, static_cast<instrDescJmp*>(id));
-            assert(isValidSimm13(imm));
-            assert(!(imm & 1));
+    const unsigned offset = static_cast<unsigned>(dataOffs + immediate);
+    assert(offset < emitDataSize());
  
-            ins  = id->idIns();
-            code = emitInsCode(ins);
-            code |= ((code_t)id->idReg1()) << 15;
-            code |= ((code_t)id->idReg2()) << 20;
-            code |= ((imm >> 11) & 0x1) << 7;
-            code |= ((imm >> 1) & 0xf) << 8;
-            code |= ((imm >> 5) & 0x3f) << 25;
-            code |= ((imm >> 12) & 0x1) << 31;
-            *(code_t*)dstRW = code;
-            dstRW += 4;
-
-            sz = sizeof(instrDescJmp);
-        }
-        break;
-        case INS_OPTS_J:
-            // jal/j/jalr/bnez/beqz/beq/bne/blt/bge/bltu/bgeu dstRW-relative.
-            {
-                ssize_t imm = emitOutputInstrJumpDistance(dstRW, dst, ig, static_cast<instrDescJmp*>(id));
-                assert((imm & 3) == 0);
+    *ins                 = id->idIns();
+    const regNumber reg1 = id->idReg1();
  
-                ins  = id->idIns();
-                code = emitInsCode(ins);
-                if (ins == INS_jal)
-                {
-                    assert(isValidSimm21(imm));
-                    code |= ((imm >> 12) & 0xff) << 12;
-                    code |= ((imm >> 11) & 0x1) << 20;
-                    code |= ((imm >> 1) & 0x3ff) << 21;
-                    code |= ((imm >> 20) & 0x1) << 31;
-                    code |= REG_RA << 7;
-                }
-                else if (ins == INS_j)
-                {
-                    assert(isValidSimm21(imm));
-                    code |= ((imm >> 12) & 0xff) << 12;
-                    code |= ((imm >> 11) & 0x1) << 20;
-                    code |= ((imm >> 1) & 0x3ff) << 21;
-                    code |= ((imm >> 20) & 0x1) << 31;
-                }
-                else if (ins == INS_jalr)
-                {
-                    assert(isValidSimm12(imm));
-                    code |= ((code_t)(imm & 0xfff) << 20);
-                    code |= ((code_t)id->idReg1()) << 7;
-                    code |= ((code_t)id->idReg2()) << 15;
-                }
-                else if (ins == INS_bnez || ins == INS_beqz)
-                {
-                    assert(isValidSimm13(imm));
-                    code |= (code_t)id->idReg1() << 15;
-                    code |= ((imm >> 11) & 0x1) << 7;
-                    code |= ((imm >> 1) & 0xf) << 8;
-                    code |= ((imm >> 5) & 0x3f) << 25;
-                    code |= ((imm >> 12) & 0x1) << 31;
-                }
-                else if ((INS_beq <= ins) && (ins <= INS_bgeu))
-                {
-                    assert(isValidSimm13(imm));
-                    code |= ((code_t)id->idReg1()) << 15;
-                    code |= ((code_t)id->idReg2()) << 20;
-                    code |= ((imm >> 11) & 0x1) << 7;
-                    code |= ((imm >> 1) & 0xf) << 8;
-                    code |= ((imm >> 5) & 0x3f) << 25;
-                    code |= ((imm >> 12) & 0x1) << 31;
-                }
-                else
-                {
-                    unreached();
-                }
+    if (id->idIsReloc())
+    {
+        return emitOutputInstr_OptsRcReloc(dst, ins, offset, reg1);
+    }
+    return emitOutputInstr_OptsRcNoReloc(dst, ins, offset, reg1);
+}
  
-                *(code_t*)dstRW = code;
-                dstRW += 4;
+BYTE* emitter::emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1)
+{
+    const ssize_t immediate = (emitConsBlock - dst) + offset;
+    assert((immediate > 0) && ((immediate & 0x03) == 0));
  
-                sz = sizeof(instrDescJmp);
-            }
-            break;
-        case INS_OPTS_C:
-            if (id->idIsLargeCall())
-            {
-                /* Must be a "fat" call descriptor */
-                sz = sizeof(instrDescCGCA);
-            }
-            else
-            {
-                assert(!id->idIsLargeDsp());
-                assert(!id->idIsLargeCns());
-                sz = sizeof(instrDesc);
-            }
-            dstRW += emitOutputCall(ig, *dp, id, 0);
+    const regNumber rsvdReg = codeGen->rsGetRsvdReg();
+    dst += emitOutput_UTypeInstr(dst, INS_auipc, rsvdReg, UpperNBitsOfWordSignExtend<20>(immediate));
+
+    instruction lastIns = *ins;
+
+    if (*ins == INS_jal)
+    {
+        *ins = lastIns = INS_addi;
+    }
+    dst += emitOutput_ITypeInstr(dst, lastIns, reg1, rsvdReg, LowerNBitsOfWord<12>(immediate));
+    return dst;
+}
+
+BYTE* emitter::emitOutputInstr_OptsRcNoReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1)
+{
+    const ssize_t immediate = reinterpret_cast<ssize_t>(emitConsBlock) + offset;
+    assertCodeLength(static_cast<size_t>(immediate), 48); // RISC-V Linux Kernel SV48
+    const regNumber rsvdReg = codeGen->rsGetRsvdReg();
+
+    const instruction lastIns = (*ins == INS_jal) ? (*ins = INS_addi) : *ins;
+    const ssize_t     high    = immediate >> 16;
+
+    dst += emitOutput_UTypeInstr(dst, INS_lui, rsvdReg, UpperNBitsOfWordSignExtend<20>(high));
+    dst += emitOutput_ITypeInstr(dst, INS_addi, rsvdReg, rsvdReg, LowerNBitsOfWord<12>(high));
+    dst += emitOutput_ITypeInstr(dst, INS_slli, rsvdReg, rsvdReg, 5);
+    dst += emitOutput_ITypeInstr(dst, INS_addi, rsvdReg, rsvdReg, LowerNBitsOfWord<5>(immediate >> 11));
+    dst += emitOutput_ITypeInstr(dst, INS_slli, rsvdReg, rsvdReg, 11);
+    dst += emitOutput_ITypeInstr(dst, lastIns, reg1, rsvdReg, LowerNBitsOfWord<11>(immediate));
+    return dst;
+}
+
+BYTE* emitter::emitOutputInstr_OptsRl(BYTE* dst, instrDesc* id, instruction* ins)
+{
+    insGroup* targetInsGroup = static_cast<insGroup*>(emitCodeGetCookie(id->idAddr()->iiaBBlabel));
+    id->idAddr()->iiaIGlabel = targetInsGroup;
+
+    const regNumber reg1   = id->idReg1();
+    const ssize_t   igOffs = targetInsGroup->igOffs;
+
+    if (id->idIsReloc())
+    {
+        *ins = INS_addi;
+        return emitOutputInstr_OptsRlReloc(dst, igOffs, reg1);
+    }
+    *ins = INS_add;
+    return emitOutputInstr_OptsRlNoReloc(dst, igOffs, reg1);
+}
+
+BYTE* emitter::emitOutputInstr_OptsRlReloc(BYTE* dst, ssize_t igOffs, regNumber reg1)
+{
+    const ssize_t immediate = (emitCodeBlock - dst) + igOffs;
+    assert((immediate & 0x03) == 0);
+
+    dst += emitOutput_UTypeInstr(dst, INS_auipc, reg1, UpperNBitsOfWordSignExtend<20>(immediate));
+    dst += emitOutput_ITypeInstr(dst, INS_addi, reg1, reg1, LowerNBitsOfWord<12>(immediate));
+    return dst;
+}
+
+BYTE* emitter::emitOutputInstr_OptsRlNoReloc(BYTE* dst, ssize_t igOffs, regNumber reg1)
+{
+    const ssize_t immediate = reinterpret_cast<ssize_t>(emitCodeBlock) + igOffs;
+    assertCodeLength(static_cast<size_t>(immediate), 48); // RISC-V Linux Kernel SV48
+
+    const regNumber rsvdReg      = codeGen->rsGetRsvdReg();
+    const ssize_t   upperSignExt = UpperWordOfDoubleWordDoubleSignExtend<32, 52>(immediate);
+
+    dst += emitOutput_UTypeInstr(dst, INS_lui, rsvdReg, UpperNBitsOfWordSignExtend<20>(immediate));
+    dst += emitOutput_ITypeInstr(dst, INS_addi, rsvdReg, rsvdReg, LowerNBitsOfWord<12>(immediate));
+    dst += emitOutput_UTypeInstr(dst, INS_lui, reg1, LowerNBitsOfWord<16>(upperSignExt));
+    dst += emitOutput_ITypeInstr(dst, INS_slli, reg1, reg1, 20);
+    dst += emitOutput_RTypeInstr(dst, INS_add, reg1, reg1, rsvdReg);
+    return dst;
+}
  
-            dstRW2 = dstRW;
-            ins    = INS_nop;
+BYTE* emitter::emitOutputInstr_OptsJalr(BYTE* dst, instrDescJmp* jmp, const insGroup* ig, instruction* ins)
+{
+    const ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, jmp) - 4;
+    assert((immediate & 0x03) == 0);
+
+    *ins = jmp->idIns();
+    switch (jmp->idCodeSize())
+    {
+        case 8:
+            return emitOutputInstr_OptsJalr8(dst, jmp, immediate);
+        case 24:
+            assert(jmp->idInsIs(INS_jal, INS_j));
+            return emitOutputInstr_OptsJalr24(dst, immediate);
+        case 28:
+            return emitOutputInstr_OptsJalr28(dst, jmp, immediate);
+        default:
+            // case 0 - 4: The original INS_OPTS_JALR: not used by now!!!
              break;
+    }
+    unreached();
+    return nullptr;
+}
+
+BYTE* emitter::emitOutputInstr_OptsJalr8(BYTE* dst, const instrDescJmp* jmp, ssize_t immediate)
+{
+    const regNumber reg2 = jmp->idInsIs(INS_beqz, INS_bnez) ? REG_R0 : jmp->idReg2();
+
+    dst += emitOutput_BTypeInstr_InvertComparation(dst, jmp->idIns(), jmp->idReg1(), reg2, 0x8);
+    dst += emitOutput_JTypeInstr(dst, INS_jal, REG_ZERO, TrimSignedToImm21(immediate));
+    return dst;
+}
+
+BYTE* emitter::emitOutputInstr_OptsJalr24(BYTE* dst, ssize_t immediate)
+{
+    // Make target address with offset, then jump (JALR) with the target address
+    immediate -= 2 * 4;
+    const ssize_t high = UpperWordOfDoubleWordSingleSignExtend<0>(immediate);
+
+    dst += emitOutput_UTypeInstr(dst, INS_lui, REG_RA, UpperNBitsOfWordSignExtend<20>(high));
+    dst += emitOutput_ITypeInstr(dst, INS_addi, REG_RA, REG_RA, LowerNBitsOfWord<12>(high));
+    dst += emitOutput_ITypeInstr(dst, INS_slli, REG_RA, REG_RA, 32);
+
+    const regNumber rsvdReg = codeGen->rsGetRsvdReg();
+    const ssize_t   low     = LowerWordOfDoubleWord(immediate);
+
+    dst += emitOutput_UTypeInstr(dst, INS_auipc, rsvdReg, UpperNBitsOfWordSignExtend<20>(low));
+    dst += emitOutput_RTypeInstr(dst, INS_add, rsvdReg, REG_RA, rsvdReg);
+    dst += emitOutput_ITypeInstr(dst, INS_jalr, REG_RA, rsvdReg, LowerNBitsOfWord<12>(low));
+
+    return dst;
+}
+
+BYTE* emitter::emitOutputInstr_OptsJalr28(BYTE* dst, const instrDescJmp* jmp, ssize_t immediate)
+{
+    regNumber reg2 = jmp->idInsIs(INS_beqz, INS_bnez) ? REG_R0 : jmp->idReg2();
  
-        // case INS_OPTS_NONE:
+    dst += emitOutput_BTypeInstr_InvertComparation(dst, jmp->idIns(), jmp->idReg1(), reg2, 0x1c);
+
+    return emitOutputInstr_OptsJalr24(dst, immediate);
+}
+
+BYTE* emitter::emitOutputInstr_OptsJCond(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins)
+{
+    const ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, static_cast<instrDescJmp*>(id));
+
+    *ins = id->idIns();
+
+    dst += emitOutput_BTypeInstr(dst, *ins, id->idReg1(), id->idReg2(), TrimSignedToImm13(immediate));
+    return dst;
+}
+
+BYTE* emitter::emitOutputInstr_OptsJ(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins)
+{
+    const ssize_t immediate = emitOutputInstrJumpDistance(dst, ig, static_cast<instrDescJmp*>(id));
+    assert((immediate & 0x03) == 0);
+
+    *ins = id->idIns();
+
+    switch (*ins)
+    {
+        case INS_jal:
+            dst += emitOutput_JTypeInstr(dst, INS_jal, REG_RA, TrimSignedToImm21(immediate));
+            break;
+        case INS_j:
+            dst += emitOutput_JTypeInstr(dst, INS_j, REG_ZERO, TrimSignedToImm21(immediate));
+            break;
+        case INS_jalr:
+            dst += emitOutput_ITypeInstr(dst, INS_jalr, id->idReg1(), id->idReg2(), TrimSignedToImm12(immediate));
+            break;
+        case INS_bnez:
+        case INS_beqz:
+            dst += emitOutput_BTypeInstr(dst, *ins, id->idReg1(), REG_ZERO, TrimSignedToImm13(immediate));
+            break;
+        case INS_beq:
+        case INS_bne:
+        case INS_blt:
+        case INS_bge:
+        case INS_bltu:
+        case INS_bgeu:
+            dst += emitOutput_BTypeInstr(dst, *ins, id->idReg1(), id->idReg2(), TrimSignedToImm13(immediate));
+            break;
          default:
-            *(code_t*)dstRW = id->idAddr()->iiaGetInstrEncode();
-            dstRW += 4;
+            unreached();
+            break;
+    }
+    return dst;
+}
+
+BYTE* emitter::emitOutputInstr_OptsC(BYTE* dst, instrDesc* id, const insGroup* ig, size_t* size)
+{
+    if (id->idIsLargeCall())
+    {
+        *size = sizeof(instrDescCGCA);
+    }
+    else
+    {
+        assert(!id->idIsLargeDsp());
+        assert(!id->idIsLargeCns());
+        *size = sizeof(instrDesc);
+    }
+    dst += emitOutputCall(ig, dst, id, 0);
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Append the machine code corresponding to the given instruction descriptor
+ *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ *  is the instruction group that contains the instruction. Updates '*dp' to
+ *  point past the generated code, and returns the size of the instruction
+ *  descriptor in bytes.
+ */
+
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+    BYTE*             dst  = *dp;
+    BYTE*             dst2 = dst + 4;
+    const BYTE* const odst = *dp;
+    instruction       ins;
+    size_t            sz = 0;
+
+    static_assert(REG_NA == static_cast<int>(REG_NA), "REG_NA must fit in an int");
+
+    insOpts insOp = id->idInsOpt();
+
+    switch (insOp)
+    {
+        case INS_OPTS_RELOC:
+            dst = emitOutputInstr_OptsReloc(dst, id, &ins);
+            sz  = sizeof(instrDesc);
+            break;
+        case INS_OPTS_I:
+            dst = emitOutputInstr_OptsI(dst, id);
+            ins = INS_addi;
+            sz  = sizeof(instrDesc);
+            break;
+        case INS_OPTS_RC:
+            dst = emitOutputInstr_OptsRc(dst, id, &ins);
+            sz  = sizeof(instrDesc);
+            break;
+        case INS_OPTS_RL:
+            dst = emitOutputInstr_OptsRl(dst, id, &ins);
+            sz  = sizeof(instrDesc);
+            break;
+        case INS_OPTS_JALR:
+            dst = emitOutputInstr_OptsJalr(dst, static_cast<instrDescJmp*>(id), ig, &ins);
+            sz  = sizeof(instrDescJmp);
+            break;
+        case INS_OPTS_J_cond:
+            dst = emitOutputInstr_OptsJCond(dst, id, ig, &ins);
+            sz  = sizeof(instrDescJmp);
+            break;
+        case INS_OPTS_J:
+            // jal/j/jalr/bnez/beqz/beq/bne/blt/bge/bltu/bgeu dstRW-relative.
+            dst = emitOutputInstr_OptsJ(dst, id, ig, &ins);
+            sz  = sizeof(instrDescJmp);
+            break;
+        case INS_OPTS_C:
+            dst  = emitOutputInstr_OptsC(dst, id, ig, &sz);
+            dst2 = dst;
+            ins  = INS_nop;
+            break;
+        default: // case INS_OPTS_NONE:
+            dst += emitOutput_Instr(dst, id->idAddr()->iiaGetInstrEncode());
              ins = id->idIns();
-            sz  = emitSizeOfInsDsc(id);
+            sz  = sizeof(instrDesc);
              break;
      }
  
@@ -2860,11 +3223,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
          // We assume that "idReg1" is the primary destination register for all instructions
          if (id->idGCref() != GCT_NONE)
          {
-            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dstRW2 - writeableOffset);
+            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst2);
          }
          else
          {
-            emitGCregDeadUpd(id->idReg1(), dstRW2 - writeableOffset);
+            emitGCregDeadUpd(id->idReg1(), dst2);
          }
      }
  
@@ -2878,7 +3241,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
          int      adr = emitComp->lvaFrameAddress(varNum, &FPbased);
          if (id->idGCref() != GCT_NONE)
          {
-            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dstRW2 - writeableOffset DEBUG_ARG(varNum));
+            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst2 DEBUG_ARG(varNum));
          }
          else
          {
@@ -2895,7 +3258,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                  vt              = tmpDsc->tdTempType();
              }
              if (vt == TYP_REF || vt == TYP_BYREF)
-                emitGCvarDeadUpd(adr + ofs, dstRW2 - writeableOffset DEBUG_ARG(varNum));
+                emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum));
          }
          // if (emitInsWritesToLclVarStackLocPair(id))
          //{
@@ -2934,7 +3297,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
  #else  // !DUMP_GC_TABLES
          bool dspOffs = !emitComp->opts.disDiffable;
  #endif // !DUMP_GC_TABLES
-        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dstRW - odstRW), ig);
+        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - odst), ig);
      }
  
      if (emitComp->compDebugBreak)
@@ -2949,28 +3312,42 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
  #else  // !DEBUG
      if (emitComp->opts.disAsm)
      {
-        emitDispIns(id, false, false, true, emitCurCodeOffs(odst), *dp, (dstRW - odstRW), ig);
+        emitDispIns(id, false, false, true, emitCurCodeOffs(odst), *dp, (dst - odst), ig);
      }
  #endif // !DEBUG
  
      /* All instructions are expected to generate code */
  
-    assert(*dp != (dstRW - writeableOffset));
+    assert(*dp != dst);
  
-    *dp = dstRW - writeableOffset;
+    *dp = dst;
  
      return sz;
  }
  
-bool emitter::emitDispBranchInstrType(unsigned opcode2) const
+/*****************************************************************************/
+/*****************************************************************************/
+
+// clang-format off
+static const char* const RegNames[] =
  {
+    #define REGDEF(name, rnum, mask, sname) sname,
+    #include "register.h"
+};
+// clang-format on
+
+bool emitter::emitDispBranchInstrType(unsigned opcode2, bool is_zero_reg, bool& print_second_reg) const
+{
+    print_second_reg = true;
      switch (opcode2)
      {
          case 0:
-            printf("beq ");
+            printf(is_zero_reg ? "beqz" : "beq ");
+            print_second_reg = !is_zero_reg;
              break;
          case 1:
-            printf("bne ");
+            printf(is_zero_reg ? "bnez" : "bne ");
+            print_second_reg = !is_zero_reg;
              break;
          case 4:
              printf("blt ");
@@ -3022,17 +3399,19 @@ void emitter::emitDispBranchLabel(const instrDesc* id) const
      printf("L_M%03u_", FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum);
  }
  
-bool emitter::emitDispBranch(unsigned         opcode2,
-                             const char*      register1Name,
-                             const char*      register2Name,
-                             const instrDesc* id,
-                             const insGroup*  ig) const
+bool emitter::emitDispBranch(
+    unsigned opcode2, unsigned rs1, unsigned rs2, const instrDesc* id, const insGroup* ig) const
  {
-    if (!emitDispBranchInstrType(opcode2))
+    bool print_second_reg = true;
+    if (!emitDispBranchInstrType(opcode2, rs2 == REG_ZERO, print_second_reg))
      {
          return false;
      }
-    printf("           %s, %s, ", register1Name, register2Name);
+    printf("           %s, ", RegNames[rs1]);
+    if (print_second_reg)
+    {
+        printf("%s, ", RegNames[rs2]);
+    }
      assert(id != nullptr);
      if (id->idAddr()->iiaHasInstrCount())
      {
@@ -3046,23 +3425,12 @@ bool emitter::emitDispBranch(unsigned         opcode2,
      }
      printf("\n");
      return true;
-}
-
-void emitter::emitDispIllegalInstruction(code_t instructionCode)
-{
-    printf("RISCV64 illegal instruction: 0x%08X\n", instructionCode);
-}
-
-/*****************************************************************************/
-/*****************************************************************************/
+}
  
-// clang-format off
-static const char* const RegNames[] =
+void emitter::emitDispIllegalInstruction(code_t instructionCode)
  {
-    #define REGDEF(name, rnum, mask, sname) sname,
-    #include "register.h"
-};
-// clang-format on
+    printf("RISCV64 illegal instruction: 0x%08X\n", instructionCode);
+}
  
  //----------------------------------------------------------------------------------------
  // Disassemble the given instruction.
@@ -3085,6 +3453,8 @@ static const char* const RegNames[] =
  void emitter::emitDispInsName(
      code_t code, const BYTE* addr, bool doffs, unsigned insOffset, const instrDesc* id, const insGroup* ig)
  {
+    static constexpr int kMaxInstructionLength = 14;
+
      const BYTE* insAdr = addr - writeableOffset;
  
      unsigned int opcode = code & 0x7f;
@@ -3121,51 +3491,95 @@ void emitter::emitDispInsName(
          }
          case 0x13:
          {
-            unsigned int opcode2 = (code >> 12) & 0x7;
-            const char*  rd      = RegNames[(code >> 7) & 0x1f];
-            const char*  rs1     = RegNames[(code >> 15) & 0x1f];
-            int          imm12   = (((int)code) >> 20); // & 0xfff;
-            // if (imm12 & 0x800)
-            //{
-            //    imm12 |= 0xfffff000;
-            //}
+            unsigned opcode2      = (code >> 12) & 0x7;
+            unsigned rd           = (code >> 7) & 0x1f;
+            unsigned rs1          = (code >> 15) & 0x1f;
+            int      imm12        = static_cast<int>(code) >> 20;
+            bool     isHex        = false;
+            bool     hasImmediate = true;
+            int      printLength  = 0;
+
              switch (opcode2)
              {
-                case 0x0: // ADDI
-                    printf("addi           %s, %s, %d\n", rd, rs1, imm12);
-                    return;
-                case 0x1:                                                         // SLLI
-                    printf("slli           %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6 BITS for SHAMT in RISCV64
-                    return;
+                case 0x0: // ADDI & MV & NOP
+                    if (code == emitInsCode(INS_nop))
+                    {
+                        printf("nop\n");
+                        return;
+                    }
+                    else if (imm12 != 0)
+                    {
+                        printLength = printf("addi");
+                    }
+                    else
+                    {
+                        printLength  = printf("mv");
+                        hasImmediate = false;
+                    }
+                    break;
+                case 0x1: // SLLI
+                {
+                    static constexpr unsigned kSlliFunct6 = 0b000000;
+
+                    unsigned funct6 = (imm12 >> 6) & 0x3f;
+                    // SLLI's instruction code's upper 6 bits have to be equal to zero
+                    if (funct6 != kSlliFunct6)
+                    {
+                        return emitDispIllegalInstruction(code);
+                    }
+                    printLength = printf("slli");
+                    imm12 &= 0x3f; // 6 BITS for SHAMT in RISCV64
+                }
+                break;
                  case 0x2: // SLTI
-                    printf("slti           %s, %s, %d\n", rd, rs1, imm12);
-                    return;
+                    printLength = printf("slti");
+                    break;
                  case 0x3: // SLTIU
-                    printf("sltiu          %s, %s, %d\n", rd, rs1, imm12);
-                    return;
+                    printLength = printf("sltiu");
+                    break;
                  case 0x4: // XORI
-                    printf("xori           %s, %s, 0x%x\n", rd, rs1, imm12);
-                    return;
+                    printLength = printf("xori");
+                    isHex       = true;
+                    break;
                  case 0x5: // SRLI & SRAI
-                    if (((code >> 30) & 0x1) == 0)
-                    {
-                        printf("srli           %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6BITS for SHAMT in RISCV64
-                    }
-                    else
+                {
+                    static constexpr unsigned kLogicalShiftFunct6    = 0b000000;
+                    static constexpr unsigned kArithmeticShiftFunct6 = 0b010000;
+
+                    unsigned funct6         = (imm12 >> 6) & 0x3f;
+                    bool     isLogicalShift = funct6 == kLogicalShiftFunct6;
+                    if ((!isLogicalShift) && (funct6 != kArithmeticShiftFunct6))
                      {
-                        printf("srai           %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6BITS for SHAMT in RISCV64
+                        return emitDispIllegalInstruction(code);
                      }
-                    return;
+                    printLength = printf(isLogicalShift ? "srli" : "srai");
+                    imm12 &= 0x3f; // 6BITS for SHAMT in RISCV64
+                }
+                break;
                  case 0x6: // ORI
-                    printf("ori            %s, %s, 0x%x\n", rd, rs1, imm12 & 0xfff);
-                    return;
+                    printLength = printf("ori");
+                    imm12 &= 0xfff;
+                    isHex = true;
+                    break;
                  case 0x7: // ANDI
-                    printf("andi           %s, %s, 0x%x\n", rd, rs1, imm12 & 0xfff);
-                    return;
+                    printLength = printf("andi");
+                    imm12 &= 0xfff;
+                    isHex = true;
+                    break;
                  default:
-                    printf("RISCV64 illegal instruction: 0x%08X\n", code);
-                    return;
+                    return emitDispIllegalInstruction(code);
+            }
+            assert(printLength > 0);
+            int paddingLength = kMaxInstructionLength - printLength;
+
+            printf("%*s %s, %s", paddingLength, "", RegNames[rd], RegNames[rs1]);
+            if (hasImmediate)
+            {
+                printf(isHex ? ", 0x%x" : ", %d", imm12);
              }
+            printf("\n");
+
+            return;
          }
          case 0x1b:
          {
@@ -3179,188 +3593,208 @@ void emitter::emitDispInsName(
              //}
              switch (opcode2)
              {
-                case 0x0: // ADDIW
-                    printf("addiw          %s, %s, %d\n", rd, rs1, imm12);
+                case 0x0: // ADDIW & SEXT.W
+                    if (imm12 == 0)
+                    {
+                        printf("sext.w         %s, %s\n", rd, rs1);
+                    }
+                    else
+                    {
+                        printf("addiw          %s, %s, %d\n", rd, rs1, imm12);
+                    }
                      return;
-                case 0x1:                                                         // SLLIW
-                    printf("slliw          %s, %s, %d\n", rd, rs1, imm12 & 0x3f); // 6 BITS for SHAMT in RISCV64
+                case 0x1: // SLLIW
+                {
+                    static constexpr unsigned kSlliwFunct7 = 0b0000000;
+
+                    unsigned funct7 = (imm12 >> 5) & 0x7f;
+                    // SLLIW's instruction code's upper 7 bits have to be equal to zero
+                    if (funct7 == kSlliwFunct7)
+                    {
+                        printf("slliw          %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5 BITS for SHAMT in RISCV64
+                    }
+                    else
+                    {
+                        emitDispIllegalInstruction(code);
+                    }
+                }
                      return;
                  case 0x5: // SRLIW & SRAIW
-                    if (((code >> 30) & 0x1) == 0)
+                {
+                    static constexpr unsigned kLogicalShiftFunct7    = 0b0000000;
+                    static constexpr unsigned kArithmeticShiftFunct7 = 0b0100000;
+
+                    unsigned funct7 = (imm12 >> 5) & 0x7f;
+                    if (funct7 == kLogicalShiftFunct7)
                      {
                          printf("srliw          %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5BITS for SHAMT in RISCV64
                      }
-                    else
+                    else if (funct7 == kArithmeticShiftFunct7)
                      {
                          printf("sraiw          %s, %s, %d\n", rd, rs1, imm12 & 0x1f); // 5BITS for SHAMT in RISCV64
                      }
+                    else
+                    {
+                        emitDispIllegalInstruction(code);
+                    }
+                }
                      return;
                  default:
-                    printf("RISCV64 illegal instruction: 0x%08X\n", code);
-                    return;
+                    return emitDispIllegalInstruction(code);
              }
          }
          case 0x33:
          {
-            unsigned int opcode2 = (code >> 25) & 0x3;
+            unsigned int opcode2 = (code >> 25) & 0x7f;
              unsigned int opcode3 = (code >> 12) & 0x7;
              const char*  rd      = RegNames[(code >> 7) & 0x1f];
              const char*  rs1     = RegNames[(code >> 15) & 0x1f];
              const char*  rs2     = RegNames[(code >> 20) & 0x1f];
-            if (opcode2 == 0)
+
+            switch (opcode2)
              {
-                switch (opcode3)
-                {
-                    case 0x0: // ADD & SUB
-                        if (((code >> 30) & 0x1) == 0)
-                        {
+                case 0b0000000:
+                    switch (opcode3)
+                    {
+                        case 0x0: // ADD
                              printf("add            %s, %s, %s\n", rd, rs1, rs2);
-                        }
-                        else
-                        {
-                            printf("sub            %s, %s, %s\n", rd, rs1, rs2);
-                        }
-                        return;
-                    case 0x1: // SLL
-                        printf("sll            %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x2: // SLT
-                        printf("slt            %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x3: // SLTU
-                        printf("sltu           %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x4: // XOR
-                        printf("xor            %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x5: // SRL & SRA
-                        if (((code >> 30) & 0x1) == 0)
-                        {
+                            return;
+                        case 0x1: // SLL
+                            printf("sll            %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x2: // SLT
+                            printf("slt            %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x3: // SLTU
+                            printf("sltu           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x4: // XOR
+                            printf("xor            %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x5: // SRL
                              printf("srl            %s, %s, %s\n", rd, rs1, rs2);
-                        }
-                        else
-                        {
+                            return;
+                        case 0x6: // OR
+                            printf("or             %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x7: // AND
+                            printf("and            %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        default:
+                            return emitDispIllegalInstruction(code);
+                    }
+                    return;
+                case 0b0100000:
+                    switch (opcode3)
+                    {
+                        case 0x0: // SUB
+                            printf("sub            %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x5: // SRA
                              printf("sra            %s, %s, %s\n", rd, rs1, rs2);
-                        }
-                        return;
-                    case 0x6: // OR
-                        printf("or             %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x7: // AND
-                        printf("and            %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    default:
-                        printf("RISCV64 illegal instruction: 0x%08X\n", code);
-                        return;
-                }
-            }
-            else if (opcode2 == 0x1)
-            {
-                switch (opcode3)
-                {
-                    case 0x0: // MUL
-                        printf("mul            %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x1: // MULH
-                        printf("mulh           %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x2: // MULHSU
-                        printf("mulhsu         %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x3: // MULHU
-                        printf("mulhu          %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x4: // DIV
-                        printf("div            %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x5: // DIVU
-                        printf("divu           %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x6: // REM
-                        printf("rem            %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x7: // REMU
-                        printf("remu           %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    default:
-                        printf("RISCV64 illegal instruction: 0x%08X\n", code);
-                        return;
-                }
-            }
-            else
-            {
-                printf("RISCV64 illegal instruction: 0x%08X\n", code);
-                return;
+                            return;
+                        default:
+                            return emitDispIllegalInstruction(code);
+                    }
+                    return;
+                case 0b0000001:
+                    switch (opcode3)
+                    {
+                        case 0x0: // MUL
+                            printf("mul            %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x1: // MULH
+                            printf("mulh           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x2: // MULHSU
+                            printf("mulhsu         %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x3: // MULHU
+                            printf("mulhu          %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x4: // DIV
+                            printf("div            %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x5: // DIVU
+                            printf("divu           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x6: // REM
+                            printf("rem            %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x7: // REMU
+                            printf("remu           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        default:
+                            return emitDispIllegalInstruction(code);
+                    }
+                    return;
+                default:
+                    return emitDispIllegalInstruction(code);
              }
          }
          case 0x3b:
          {
-            unsigned int opcode2 = (code >> 25) & 0x3;
+            unsigned int opcode2 = (code >> 25) & 0x7f;
              unsigned int opcode3 = (code >> 12) & 0x7;
              const char*  rd      = RegNames[(code >> 7) & 0x1f];
              const char*  rs1     = RegNames[(code >> 15) & 0x1f];
              const char*  rs2     = RegNames[(code >> 20) & 0x1f];
  
-            if (opcode2 == 0)
+            switch (opcode2)
              {
-                switch (opcode3)
-                {
-                    case 0x0: // ADDW & SUBW
-                        if (((code >> 30) & 0x1) == 0)
-                        {
+                case 0b0000000:
+                    switch (opcode3)
+                    {
+                        case 0x0: // ADDW
                              printf("addw           %s, %s, %s\n", rd, rs1, rs2);
-                        }
-                        else
-                        {
-                            printf("subw           %s, %s, %s\n", rd, rs1, rs2);
-                        }
-                        return;
-                    case 0x1: // SLLW
-                        printf("sllw           %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x5: // SRLW & SRAW
-                        if (((code >> 30) & 0x1) == 0)
-                        {
+                            return;
+                        case 0x1: // SLLW
+                            printf("sllw           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x5: // SRLW
                              printf("srlw           %s, %s, %s\n", rd, rs1, rs2);
-                        }
-                        else
-                        {
+                            return;
+                        default:
+                            return emitDispIllegalInstruction(code);
+                    }
+                    return;
+                case 0b0100000:
+                    switch (opcode3)
+                    {
+                        case 0x0: // SUBW
+                            printf("subw           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x5: // SRAW
                              printf("sraw           %s, %s, %s\n", rd, rs1, rs2);
-                        }
-                        return;
-                    default:
-                        printf("RISCV64 illegal instruction: 0x%08X\n", code);
-                        return;
-                }
-            }
-            else if (opcode2 == 1)
-            {
-                switch (opcode3)
-                {
-                    case 0x0: // MULW
-                        printf("mulw           %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x4: // DIVW
-                        printf("divw           %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x5: // DIVUW
-                        printf("divuw          %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x6: // REMW
-                        printf("remw           %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    case 0x7: // REMUW
-                        printf("remuw          %s, %s, %s\n", rd, rs1, rs2);
-                        return;
-                    default:
-                        printf("RISCV64 illegal instruction: 0x%08X\n", code);
-                        return;
-                }
-            }
-            else
-            {
-                printf("RISCV64 illegal instruction: 0x%08X\n", code);
-                return;
+                            return;
+                        default:
+                            return emitDispIllegalInstruction(code);
+                    }
+                    return;
+                case 0b0000001:
+                    switch (opcode3)
+                    {
+                        case 0x0: // MULW
+                            printf("mulw           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x4: // DIVW
+                            printf("divw           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x5: // DIVUW
+                            printf("divuw          %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x6: // REMW
+                            printf("remw           %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        case 0x7: // REMUW
+                            printf("remuw          %s, %s, %s\n", rd, rs1, rs2);
+                            return;
+                        default:
+                            return emitDispIllegalInstruction(code);
+                    }
+                    return;
+                default:
+                    return emitDispIllegalInstruction(code);
              }
          }
          case 0x23:
@@ -3395,9 +3829,9 @@ void emitter::emitDispInsName(
          }
          case 0x63: // BRANCH
          {
-            unsigned int opcode2 = (code >> 12) & 0x7;
-            const char*  rs1     = RegNames[(code >> 15) & 0x1f];
-            const char*  rs2     = RegNames[(code >> 20) & 0x1f];
+            unsigned opcode2 = (code >> 12) & 0x7;
+            unsigned rs1     = (code >> 15) & 0x1f;
+            unsigned rs2     = (code >> 20) & 0x1f;
              // int offset = (((code >> 31) & 0x1) << 12) | (((code >> 7) & 0x1) << 11) | (((code >> 25) & 0x3f) << 5) |
              //              (((code >> 8) & 0xf) << 1);
              // if (offset & 0x800)
@@ -3474,14 +3908,21 @@ void emitter::emitDispInsName(
          }
          case 0x6f:
          {
-            const char* rd = RegNames[(code >> 7) & 0x1f];
-            int offset = (((code >> 31) & 0x1) << 20) | (((code >> 12) & 0xff) << 12) | (((code >> 20) & 0x1) << 11) |
+            unsigned rd = (code >> 7) & 0x1f;
+            int offset  = (((code >> 31) & 0x1) << 20) | (((code >> 12) & 0xff) << 12) | (((code >> 20) & 0x1) << 11) |
                           (((code >> 21) & 0x3ff) << 1);
              if (offset & 0x80000)
              {
                  offset |= 0xfff00000;
              }
-            printf("jal            %s, %d", rd, offset);
+            if (rd == REG_ZERO)
+            {
+                printf("j              %d", offset);
+            }
+            else
+            {
+                printf("jal            %s, %d", RegNames[rd], offset);
+            }
              CORINFO_METHOD_HANDLE handle = (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie;
              if (handle != 0)
              {
@@ -4049,6 +4490,12 @@ void emitter::emitDispIns(
          instrSize = sizeof(code_t);
          code_t instruction;
          memcpy(&instruction, instr, instrSize);
+#ifdef DEBUG
+        if (emitComp->verbose && i != 0)
+        {
+            printf("        ");
+        }
+#endif
          emitDispInsName(instruction, instr, doffs, offset, id, ig);
      }
  }
@@ -4599,18 +5046,16 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
                      if (dstReg == regOp1)
                      {
                          assert(tempReg != regOp1);
-                        assert(REG_RA != regOp1);
                          saveOperReg1 = tempReg;
-                        saveOperReg2 = regOp2;
-                        emitIns_R_R_I(INS_addi, attr, tempReg, regOp1, 0);
+                        saveOperReg2 = (regOp1 == regOp2) ? tempReg : regOp2;
+                        emitIns_R_R(INS_mov, attr, tempReg, regOp1);
                      }
                      else if (dstReg == regOp2)
                      {
                          assert(tempReg != regOp2);
-                        assert(REG_RA != regOp2);
                          saveOperReg1 = regOp1;
                          saveOperReg2 = tempReg;
-                        emitIns_R_R_I(INS_addi, attr, tempReg, regOp2, 0);
+                        emitIns_R_R(INS_mov, attr, tempReg, regOp2);
                      }
                      else
                      {
@@ -4621,73 +5066,84 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst,
  
                  emitIns_R_R_R(ins, attr, dstReg, regOp1, regOp2);
  
+                /*
+                    Check if A = B + C
+                    ADD : A = B + C
+                    SUB : B = A - C
+                    In case of addition:
+                    dst = src1 + src2
+                    A = dst
+                    B = src1
+                    C = src2
+                    In case of subtraction:
+                    dst = src1 - src2
+                    src1 = dst + src2
+                    A = src1
+                    B = dst
+                    C = src2
+                */
                  if (needCheckOv)
                  {
-                    ssize_t   imm;
-                    regNumber tempReg1;
-                    regNumber tempReg2;
-                    // ADD : A = B + C
-                    // SUB : C = A - B
-                    bool isAdd = (dst->OperGet() == GT_ADD);
+                    regNumber resultReg = REG_NA;
+
+                    if (dst->OperGet() == GT_ADD)
+                    {
+                        resultReg = dstReg;
+                        regOp1    = saveOperReg1;
+                        regOp2    = saveOperReg2;
+                    }
+                    else
+                    {
+                        resultReg = saveOperReg1;
+                        regOp1    = dstReg;
+                        regOp2    = saveOperReg2;
+                    }
+
+                    instruction branchIns  = INS_none;
+                    regNumber   branchReg1 = REG_NA;
+                    regNumber   branchReg2 = REG_NA;
+
                      if ((dst->gtFlags & GTF_UNSIGNED) != 0)
                      {
-                        // if A < B, goto overflow
-                        if (isAdd)
-                        {
-                            tempReg1 = dstReg;
-                            tempReg2 = saveOperReg1;
-                        }
-                        else
-                        {
-                            tempReg1 = saveOperReg1;
-                            tempReg2 = saveOperReg2;
-                        }
-                        codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, INS_bltu, tempReg1, nullptr, tempReg2);
+                        // if A < B then overflow
+                        branchIns  = INS_bltu;
+                        branchReg1 = resultReg;
+                        branchReg2 = regOp1;
                      }
                      else
                      {
-                        tempReg1 = REG_RA;
-                        tempReg2 = dst->ExtractTempReg();
-                        assert(tempReg1 != tempReg2);
-                        assert(tempReg1 != saveOperReg1);
-                        assert(tempReg2 != saveOperReg2);
+                        regNumber tempReg1 = dst->ExtractTempReg();
  
-                        ssize_t ui6 = (attr == EA_4BYTE) ? 31 : 63;
-                        emitIns_R_R_I(INS_srli, attr, tempReg1, isAdd ? saveOperReg1 : dstReg, ui6);
-                        emitIns_R_R_I(INS_srli, attr, tempReg2, saveOperReg2, ui6);
+                        branchIns = INS_bne;
  
-                        emitIns_R_R_R(INS_xor, attr, tempReg1, tempReg1, tempReg2);
                          if (attr == EA_4BYTE)
                          {
-                            imm = 1;
-                            emitIns_R_R_I(INS_andi, attr, tempReg1, tempReg1, imm);
-                            emitIns_R_R_I(INS_andi, attr, tempReg2, tempReg2, imm);
-                        }
-                        // if (B > 0 && C < 0) || (B < 0  && C > 0), skip overflow
-                        BasicBlock* tmpLabel  = codeGen->genCreateTempLabel();
-                        BasicBlock* tmpLabel2 = codeGen->genCreateTempLabel();
-                        BasicBlock* tmpLabel3 = codeGen->genCreateTempLabel();
-
-                        emitIns_J_cond_la(INS_bne, tmpLabel, tempReg1, REG_R0);
+                            assert(src1->gtType != TYP_LONG);
+                            assert(src2->gtType != TYP_LONG);
  
-                        emitIns_J_cond_la(INS_bne, tmpLabel3, tempReg2, REG_R0);
+                            emitIns_R_R_R(INS_add, attr, tempReg1, regOp1, regOp2);
  
-                        // B > 0 and C > 0, if A < B, goto overflow
-                        emitIns_J_cond_la(INS_bge, tmpLabel, isAdd ? dstReg : saveOperReg1,
-                                          isAdd ? saveOperReg1 : saveOperReg2);
-
-                        codeGen->genDefineTempLabel(tmpLabel2);
-
-                        codeGen->genJumpToThrowHlpBlk(EJ_jmp, SCK_OVERFLOW);
-
-                        codeGen->genDefineTempLabel(tmpLabel3);
+                            // if 64-bit addition is not equal to 32-bit addition for 32-bit operands then overflow
+                            branchReg1 = resultReg;
+                            branchReg2 = tempReg1;
+                        }
+                        else
+                        {
+                            assert(attr == EA_8BYTE);
+                            assert(tempReg != tempReg1);
+                            // When the tempReg2 is being used then the tempReg has to be already dead
+                            regNumber tempReg2 = tempReg;
  
-                        // B < 0 and C < 0, if A > B, goto overflow
-                        emitIns_J_cond_la(INS_blt, tmpLabel2, isAdd ? saveOperReg1 : saveOperReg2,
-                                          isAdd ? dstReg : saveOperReg1);
+                            emitIns_R_R_R(INS_slt, attr, tempReg1, resultReg, regOp1);
+                            emitIns_R_R_I(INS_slti, attr, tempReg2, regOp2, 0);
  
-                        codeGen->genDefineTempLabel(tmpLabel);
+                            // if ((A < B) != (C < 0)) then overflow
+                            branchReg1 = tempReg1;
+                            branchReg2 = tempReg2;
+                        }
                      }
+
+                    codeGen->genJumpToThrowHlpBlk_la(SCK_OVERFLOW, branchIns, branchReg1, nullptr, branchReg2);
                  }
              }
              break;
diff --git a/src/coreclr/jit/emitriscv64.h b/src/coreclr/jit/emitriscv64.h

index d3b00b2..ba85379 100644 (file)
--- a/src/coreclr/jit/emitriscv64.h
+++ b/src/coreclr/jit/emitriscv64.h
@@ -63,25 +63,21 @@ bool emitInsIsLoadOrStore(instruction ins);
  void emitDispInsName(
      code_t code, const BYTE* addr, bool doffs, unsigned insOffset, const instrDesc* id, const insGroup* ig);
  void emitDispInsInstrNum(const instrDesc* id) const;
-bool emitDispBranch(unsigned         opcode2,
-                    const char*      register1Name,
-                    const char*      register2Name,
-                    const instrDesc* id,
-                    const insGroup*  ig) const;
+bool emitDispBranch(unsigned opcode2, unsigned rs1, unsigned rs2, const instrDesc* id, const insGroup* ig) const;
  void emitDispBranchOffset(const instrDesc* id, const insGroup* ig) const;
  void emitDispBranchLabel(const instrDesc* id) const;
-bool emitDispBranchInstrType(unsigned opcode2) const;
+bool emitDispBranchInstrType(unsigned opcode2, bool is_zero_reg, bool& print_second_reg) const;
  void emitDispIllegalInstruction(code_t instructionCode);
  
-emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/);
+emitter::code_t emitInsCode(instruction ins /*, insFormat fmt*/) const;
  
  // Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + offset]
  void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir);
  
  // Emit the 32-bit RISCV64 instruction 'code' into the 'dst'  buffer
-unsigned emitOutput_Instr(BYTE* dst, code_t code);
+unsigned emitOutput_Instr(BYTE* dst, code_t code) const;
  
-ssize_t emitOutputInstrJumpDistance(const BYTE* dst, const BYTE* src, const insGroup* ig, instrDescJmp* jmp);
+ssize_t emitOutputInstrJumpDistance(const BYTE* src, const insGroup* ig, instrDescJmp* jmp);
  void emitOutputInstrJumpDistanceHelper(const insGroup* ig,
                                         instrDescJmp*   jmp,
                                         UNATIVE_OFFSET& dstOffs,
@@ -94,6 +90,58 @@ bool IsRedundantMov(instruction ins, emitAttr size, regNumber dst, regNumber src
  bool IsRedundantLdStr(
      instruction ins, regNumber reg1, regNumber reg2, ssize_t imm, emitAttr size, insFormat fmt); // New functions end.
  
+static code_t insEncodeRTypeInstr(
+    unsigned opcode, unsigned rd, unsigned funct3, unsigned rs1, unsigned rs2, unsigned funct7);
+static code_t insEncodeITypeInstr(unsigned opcode, unsigned rd, unsigned funct3, unsigned rs1, unsigned imm12);
+static code_t insEncodeSTypeInstr(unsigned opcode, unsigned funct3, unsigned rs1, unsigned rs2, unsigned imm12);
+static code_t insEncodeUTypeInstr(unsigned opcode, unsigned rd, unsigned imm20);
+static code_t insEncodeBTypeInstr(unsigned opcode, unsigned funct3, unsigned rs1, unsigned rs2, unsigned imm13);
+static code_t insEncodeJTypeInstr(unsigned opcode, unsigned rd, unsigned imm21);
+
+#ifdef DEBUG
+static void emitOutput_RTypeInstr_SanityCheck(instruction ins, regNumber rd, regNumber rs1, regNumber rs2);
+static void emitOutput_ITypeInstr_SanityCheck(
+    instruction ins, regNumber rd, regNumber rs1, unsigned immediate, unsigned opcode);
+static void emitOutput_STypeInstr_SanityCheck(instruction ins, regNumber rs1, regNumber rs2);
+static void emitOutput_UTypeInstr_SanityCheck(instruction ins, regNumber rd);
+static void emitOutput_BTypeInstr_SanityCheck(instruction ins, regNumber rs1, regNumber rs2);
+static void emitOutput_JTypeInstr_SanityCheck(instruction ins, regNumber rd);
+#endif // DEBUG
+
+static unsigned castFloatOrIntegralReg(regNumber reg);
+
+unsigned emitOutput_RTypeInstr(BYTE* dst, instruction ins, regNumber rd, regNumber rs1, regNumber rs2) const;
+unsigned emitOutput_ITypeInstr(BYTE* dst, instruction ins, regNumber rd, regNumber rs1, unsigned imm12) const;
+unsigned emitOutput_STypeInstr(BYTE* dst, instruction ins, regNumber rs1, regNumber rs2, unsigned imm12) const;
+unsigned emitOutput_UTypeInstr(BYTE* dst, instruction ins, regNumber rd, unsigned imm20) const;
+unsigned emitOutput_BTypeInstr(BYTE* dst, instruction ins, regNumber rs1, regNumber rs2, unsigned imm13) const;
+unsigned emitOutput_BTypeInstr_InvertComparation(
+    BYTE* dst, instruction ins, regNumber rs1, regNumber rs2, unsigned imm13) const;
+unsigned emitOutput_JTypeInstr(BYTE* dst, instruction ins, regNumber rd, unsigned imm21) const;
+
+BYTE* emitOutputInstr_OptsReloc(BYTE* dst, const instrDesc* id, instruction* ins);
+BYTE* emitOutputInstr_OptsI(BYTE* dst, const instrDesc* id);
+BYTE* emitOutputInstr_OptsI8(BYTE* dst, const instrDesc* id, ssize_t immediate, regNumber reg1);
+BYTE* emitOutputInstr_OptsI32(BYTE* dst, ssize_t immediate, regNumber reg1);
+BYTE* emitOutputInstr_OptsRc(BYTE* dst, const instrDesc* id, instruction* ins);
+BYTE* emitOutputInstr_OptsRcReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1);
+BYTE* emitOutputInstr_OptsRcNoReloc(BYTE* dst, instruction* ins, unsigned offset, regNumber reg1);
+BYTE* emitOutputInstr_OptsRl(BYTE* dst, instrDesc* id, instruction* ins);
+BYTE* emitOutputInstr_OptsRlReloc(BYTE* dst, ssize_t igOffs, regNumber reg1);
+BYTE* emitOutputInstr_OptsRlNoReloc(BYTE* dst, ssize_t igOffs, regNumber reg1);
+BYTE* emitOutputInstr_OptsJalr(BYTE* dst, instrDescJmp* jmp, const insGroup* ig, instruction* ins);
+BYTE* emitOutputInstr_OptsJalr8(BYTE* dst, const instrDescJmp* jmp, ssize_t immediate);
+BYTE* emitOutputInstr_OptsJalr24(BYTE* dst, ssize_t immediate);
+BYTE* emitOutputInstr_OptsJalr28(BYTE* dst, const instrDescJmp* jmp, ssize_t immediate);
+BYTE* emitOutputInstr_OptsJCond(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins);
+BYTE* emitOutputInstr_OptsJ(BYTE* dst, instrDesc* id, const insGroup* ig, instruction* ins);
+BYTE* emitOutputInstr_OptsC(BYTE* dst, instrDesc* id, const insGroup* ig, size_t* size);
+
+static unsigned TrimSignedToImm12(int imm12);
+static unsigned TrimSignedToImm13(int imm13);
+static unsigned TrimSignedToImm20(int imm20);
+static unsigned TrimSignedToImm21(int imm21);
+
  /************************************************************************/
  /*           Public inline informational methods                        */
  /************************************************************************/
@@ -285,7 +333,7 @@ void emitIns_Call(EmitCallType          callType,
                    ssize_t          disp   = 0,
                    bool             isJump = false);
  
-unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code);
+unsigned emitOutputCall(const insGroup* ig, BYTE* dst, instrDesc* id, code_t code);
  
  unsigned get_curTotalCodeSize(); // bytes of code
  
diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp

index 96409d4..5a1a084 100644 (file)
--- a/src/coreclr/jit/gcencode.cpp
+++ b/src/coreclr/jit/gcencode.cpp
@@ -3887,7 +3887,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
              //
              const int osrOffset = ppInfo->GenericContextArgOffset() - 2 * REGSIZE_BYTES;
              assert(offset == osrOffset);
-#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
              // PP info has virtual offset. This is also the caller SP offset.
              //
              const int osrOffset = ppInfo->GenericContextArgOffset();
@@ -3930,7 +3930,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
              //
              const int osrOffset = ppInfo->KeptAliveThisOffset() - 2 * REGSIZE_BYTES;
              assert(offset == osrOffset);
-#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
              // PP info has virtual offset. This is also the caller SP offset.
              //
              const int osrOffset = ppInfo->KeptAliveThisOffset();
diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h

index 109da6a..87eeeeb 100644 (file)
--- a/src/coreclr/jit/gentree.h
+++ b/src/coreclr/jit/gentree.h
@@ -4486,6 +4486,15 @@ public:
  #endif
      }
  
+    bool IsMismatchedArgType() const
+    {
+#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+        return isValidIntArgReg(GetRegNum()) && varTypeUsesFloatReg(ArgType);
+#else
+        return false;
+#endif // TARGET_LOONGARCH64 || TARGET_RISCV64
+    }
+
      void SetByteSize(unsigned byteSize, unsigned byteAlignment, bool isStruct, bool isFloatHfa);
  
      // Get the number of bytes that this argument is occupying on the stack,
@@ -5480,7 +5489,7 @@ struct GenTreeCall final : public GenTree
              return WellKnownArg::VirtualStubCell;
          }
  
-#if defined(TARGET_ARMARCH)
+#if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64)
          // For ARM architectures, we always use an indirection cell for R2R calls.
          if (IsR2RRelativeIndir() && !IsDelegateInvoke())
          {
diff --git a/src/coreclr/jit/instrsriscv64.h b/src/coreclr/jit/instrsriscv64.h

index f38f375..b1f690b 100644 (file)
--- a/src/coreclr/jit/instrsriscv64.h
+++ b/src/coreclr/jit/instrsriscv64.h
@@ -35,7 +35,8 @@ INST(invalid,       "INVALID",        0,    BAD_CODE)
  INST(nop,           "nop",            0,    0x00000013)
  
  //// R_R
-INST(mov,           "mov",            0,    0x00000013)
+INST(mov,           "mv",             0,    0x00000013)
+INST(sext_w,        "sext.w",         0,    0x0000001b)
  
  ////R_I
  INST(lui,           "lui",            0,    0x00000037)
@@ -205,8 +206,8 @@ INST(fmin_d,        "fmin.d",         0,   0x2a000053)
  INST(fmax_d,        "fmax.d",         0,   0x2a001053)
  
  //// R_R
-INST(fcvt_s_d,      "fcvt.s.d",       0,   0x40101053)
-INST(fcvt_d_s,      "fcvt.d.s",       0,   0x42001053)
+INST(fcvt_s_d,      "fcvt.s.d",       0,   0x40100053)
+INST(fcvt_d_s,      "fcvt.d.s",       0,   0x42000053)
  
  //// R_R_R
  INST(feq_d,         "feq.d",          0,   0xa2002053)
@@ -215,10 +216,10 @@ INST(fle_d,         "fle.d",          0,   0xa2000053)
  
  //// R_R
  INST(fclass_d,      "fclass.d",       0,   0xe2001053)
-INST(fcvt_w_d,      "fcvt.w.d",       0,   0xc2001053)
-INST(fcvt_wu_d,     "fcvt.wu.d",      0,   0xc2101053)
-INST(fcvt_d_w,      "fcvt.d.w",       0,   0xd2001053)
-INST(fcvt_d_wu,     "fcvt.d.wu",      0,   0xd2101053)
+INST(fcvt_w_d,      "fcvt.w.d",       0,   0xc2000053)
+INST(fcvt_wu_d,     "fcvt.wu.d",      0,   0xc2100053)
+INST(fcvt_d_w,      "fcvt.d.w",       0,   0xd2000053)
+INST(fcvt_d_wu,     "fcvt.d.wu",      0,   0xd2100053)
  
  //// R_R_I
  INST(flw,           "flw",            LD,  0x00002007)
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h

index b338403..5f004e4 100644 (file)
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -541,11 +541,11 @@ CONFIG_INTEGER(JitRandomGuardedDevirtualization, W("JitRandomGuardedDevirtualiza
  #endif // DEBUG
  
  // Enable insertion of patchpoints into Tier0 methods, switching to optimized where needed.
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 1)
  #else
  CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 0)
-#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  // Initial patchpoint counter value used by jitted code
  CONFIG_INTEGER(TC_OnStackReplacement_InitialCounter, W("TC_OnStackReplacement_InitialCounter"), 1000)
  // Enable partial compilation for Tier0 methods
diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp

index e8fe8ad..b0f3348 100644 (file)
--- a/src/coreclr/jit/lclvars.cpp
+++ b/src/coreclr/jit/lclvars.cpp
@@ -5256,7 +5256,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
  
      if (opts.IsOSR())
      {
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
          // Stack offset includes Tier0 frame.
          //
          JITDUMP("--- delta bump %d for OSR + Tier0 frame\n", info.compPatchpointInfo->TotalFrameSize());
@@ -5361,7 +5361,7 @@ void Compiler::lvaFixVirtualFrameOffsets()
  
  #endif // FEATURE_FIXED_OUT_ARGS
  
-#if defined(TARGET_ARM64) || defined(TARGET_RISCV64)
+#if defined(TARGET_ARM64)
      // We normally add alignment below the locals between them and the outgoing
      // arg space area. When we store fp/lr(ra) at the bottom, however, this will
      // be below the alignment. So we should not apply the alignment adjustment to
@@ -5373,11 +5373,11 @@ void Compiler::lvaFixVirtualFrameOffsets()
      {
          lvaTable[lvaRetAddrVar].SetStackOffset(REGSIZE_BYTES);
      }
-#elif defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
      assert(codeGen->isFramePointerUsed());
      if (lvaRetAddrVar != BAD_VAR_NUM)
      {
-        // For LoongArch64, the RA is below the fp. see the `genPushCalleeSavedRegisters`
+        // For LoongArch64 and RISCV64, the RA is below the fp. see the `genPushCalleeSavedRegisters`
          lvaTable[lvaRetAddrVar].SetStackOffset(-REGSIZE_BYTES);
      }
  #endif // !TARGET_LOONGARCH64
@@ -6166,17 +6166,11 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
          stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
      }
  
-#elif defined(TARGET_LOONGARCH64)
-
-    assert(compCalleeRegsPushed >= 2);
-
-#elif defined(TARGET_RISCV64)
+#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  
-    // Subtract off FP and RA.
      assert(compCalleeRegsPushed >= 2);
-    stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
  
-#else // !TARGET_RISCV64
+#else // !TARGET_LOONGARCH64 && !TARGET_RISCV64
  #ifdef TARGET_ARM
      // On ARM32 LR is part of the pushed registers and is always stored at the
      // top.
@@ -6187,7 +6181,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
  #endif
  
      stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
-#endif // !TARGET_RISCV64
+#endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64
  
      // (2) Account for the remainder of the frame
      //
@@ -6909,11 +6903,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
      }
  #endif // TARGET_ARM64
  
-#if defined(TARGET_RISCV64)
-    assert(isFramePointerUsed()); // Note that currently we always have a frame pointer
-    stkOffs -= 2 * REGSIZE_BYTES;
-#endif // TARGET_RISCV64
-
  #if FEATURE_FIXED_OUT_ARGS
      if (lvaOutgoingArgSpaceSize > 0)
      {
@@ -6930,8 +6919,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
      }
  #endif // FEATURE_FIXED_OUT_ARGS
  
-#ifdef TARGET_LOONGARCH64
-    // For LoongArch64, CalleeSavedRegs are at bottom.
+#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
+    // For LoongArch64 and RISCV64, CalleeSavedRegs are at bottom.
      int pushedCount = 0;
  #else
      // compLclFrameSize equals our negated virtual stack offset minus the pushed registers and return address
@@ -7910,7 +7899,7 @@ int Compiler::lvaToCallerSPRelativeOffset(int offset, bool isFpBased, bool forRo
          offset += codeGen->genCallerSPtoInitialSPdelta();
      }
  
-#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
      if (forRootFrame && opts.IsOSR())
      {
          const PatchpointInfo* const ppInfo = info.compPatchpointInfo;
@@ -7929,7 +7918,7 @@ int Compiler::lvaToCallerSPRelativeOffset(int offset, bool isFpBased, bool forRo
          //
          const int adjustment = ppInfo->TotalFrameSize() + REGSIZE_BYTES;
  
-#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
+#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  
          const int adjustment = ppInfo->TotalFrameSize();
  #endif
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp

index 546f95e..6aa04c1 100644 (file)
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -1611,9 +1611,10 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late)
      {
  
  #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
-        if (call->IsVarargs() || comp->opts.compUseSoftFP)
+        if (call->IsVarargs() || comp->opts.compUseSoftFP || callArg->AbiInfo.IsMismatchedArgType())
          {
              // For vararg call or on armel, reg args should be all integer.
+            // For arg type and arg reg mismatch, reg arg should be integer on riscv64
              // Insert copies as needed to move float value to integer register.
              GenTree* newNode = LowerFloatArg(ppArg, callArg);
              if (newNode != nullptr)
@@ -1644,7 +1645,7 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg, bool late)
  
  #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
  //------------------------------------------------------------------------
-// LowerFloatArg: Lower float call arguments on the arm/LoongArch64 platform.
+// LowerFloatArg: Lower float call arguments on the arm/LoongArch64/RiscV64 platform.
  //
  // Arguments:
  //    arg  - The arg node
diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp

index 65426e4..da22980 100644 (file)
--- a/src/coreclr/jit/lowerriscv64.cpp
+++ b/src/coreclr/jit/lowerriscv64.cpp
@@ -245,7 +245,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
              src = src->AsUnOp()->gtGetOp1();
          }
  
-        if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= INITBLK_UNROLL_LIMIT) && src->OperIs(GT_CNS_INT))
+        if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) &&
+            src->OperIs(GT_CNS_INT))
          {
              blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
  
@@ -291,10 +292,11 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
              comp->lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DoNotEnregisterReason::BlockOp));
          }
  
-        ClassLayout* layout  = blkNode->GetLayout();
-        bool         doCpObj = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
+        ClassLayout* layout               = blkNode->GetLayout();
+        bool         doCpObj              = !blkNode->OperIs(GT_STORE_DYN_BLK) && layout->HasGCPtr();
+        unsigned     copyBlockUnrollLimit = comp->getUnrollThreshold(Compiler::UnrollKind::Memcpy);
  
-        if (doCpObj && (size <= CPBLK_UNROLL_LIMIT))
+        if (doCpObj && (size <= copyBlockUnrollLimit))
          {
              // No write barriers are needed on the stack.
              // If the layout contains a byref, then we know it must live on the stack.
@@ -314,7 +316,7 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
              assert((dstAddr->TypeGet() == TYP_BYREF) || (dstAddr->TypeGet() == TYP_I_IMPL));
              blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
          }
-        else if (blkNode->OperIs(GT_STORE_BLK) && (size <= CPBLK_UNROLL_LIMIT))
+        else if (blkNode->OperIs(GT_STORE_BLK) && (size <= copyBlockUnrollLimit))
          {
              blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
  
diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp

index 0136473..6dc2f4b 100644 (file)
--- a/src/coreclr/jit/lsrariscv64.cpp
+++ b/src/coreclr/jit/lsrariscv64.cpp
@@ -319,7 +319,7 @@ int LinearScan::BuildNode(GenTree* tree)
                          needTemp = true;
                  }
  
-                if (!needTemp && (tree->gtOper == GT_DIV || tree->gtOper == GT_MOD))
+                if (!needTemp && tree->OperIs(GT_DIV, GT_MOD))
                  {
                      if ((exceptions & ExceptionSetFlags::ArithmeticException) != ExceptionSetFlags::None)
                          needTemp = true;
diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp

index fa2ef10..211452f 100644 (file)
--- a/src/coreclr/jit/regalloc.cpp
+++ b/src/coreclr/jit/regalloc.cpp
@@ -262,6 +262,16 @@ bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
      }
  #endif // TARGET_LOONGARCH64
  
+#ifdef TARGET_RISCV64
+    // TODO-RISCV64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog
+    // can handle non-frame pointer frames.
+    if (!result)
+    {
+        INDEBUG(reason = "Temporary RISCV64 force frame pointer");
+        result = true;
+    }
+#endif // TARGET_RISCV64
+
  #ifdef DEBUG
      if ((result == true) && (wbReason != nullptr))
      {
diff --git a/src/coreclr/jit/targetriscv64.h b/src/coreclr/jit/targetriscv64.h

index a12bcc0..5ac82fa 100644 (file)
--- a/src/coreclr/jit/targetriscv64.h
+++ b/src/coreclr/jit/targetriscv64.h
@@ -12,8 +12,6 @@
    #define ROUND_FLOAT              0       // Do not round intermed float expression results
    #define CPU_HAS_BYTE_REGS        0
  
-  #define CPBLK_UNROLL_LIMIT       64     // Upper bound to let the code generator to loop unroll CpBlk
-  #define INITBLK_UNROLL_LIMIT     64     // Upper bound to let the code generator to loop unroll InitBlk
  
  #ifdef FEATURE_SIMD
  #pragma error("SIMD Unimplemented yet RISCV64")
@@ -298,6 +296,9 @@
    #define B_DIST_SMALL_MAX_NEG  (-4096)
    #define B_DIST_SMALL_MAX_POS  (+4095)
  
+  // The number of bytes from the end the last probed page that must also be probed, to allow for some
+  // small SP adjustments without probes. If zero, then the stack pointer can point to the last byte/word
+  // on the stack guard page, and must be touched before any further "SUB SP".
    #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 0
  
  // clang-format on
diff --git a/src/coreclr/pal/inc/pal_endian.h b/src/coreclr/pal/inc/pal_endian.h

index 43a8167..ddfdeab 100644 (file)
--- a/src/coreclr/pal/inc/pal_endian.h
+++ b/src/coreclr/pal/inc/pal_endian.h
@@ -102,6 +102,11 @@ inline void SwapGuid(GUID *pGuid)
  #define ALIGN_ACCESS    ((1<<LOG2_PTRSIZE)-1)
  #endif
  
+#ifdef HOST_RISCV64
+#define LOG2_PTRSIZE   3
+#define ALIGN_ACCESS    ((1<<LOG2_PTRSIZE)-1)
+#endif
+
  #if defined(ALIGN_ACCESS) && !defined(_MSC_VER)
  #ifdef __cplusplus
  extern "C++" {
diff --git a/src/coreclr/pal/inc/rt/ntimage.h b/src/coreclr/pal/inc/rt/ntimage.h

index 17680e2..3c7de05 100644 (file)
--- a/src/coreclr/pal/inc/rt/ntimage.h
+++ b/src/coreclr/pal/inc/rt/ntimage.h
@@ -1025,7 +1025,6 @@ typedef IMAGE_RELOCATION UNALIGNED *PIMAGE_RELOCATION;
  // RISCV64 relocation types
  //
  #define IMAGE_REL_RISCV64_PC            0x0003
-#define IMAGE_REL_RISCV64_JALR          0x0004
  
  //
  // CEF relocation types.
diff --git a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc

index cd61556..af7e533 100644 (file)
--- a/src/coreclr/pal/inc/unixasmmacrosriscv64.inc
+++ b/src/coreclr/pal/inc/unixasmmacrosriscv64.inc
@@ -293,8 +293,14 @@ C_FUNC(\Name\()_End):
  .endm
  
  .macro EPILOG_WITH_TRANSITION_BLOCK_RETURN
-// TODO RISCV NYI
-    sw  ra, 0(zero)
+
+    RESTORE_CALLEESAVED_REGISTERS sp, __PWTB_CalleeSavedRegisters
+
+    EPILOG_RESTORE_REG_PAIR       fp, ra, __PWTB_CalleeSavedRegisters
+
+    EPILOG_STACK_FREE             __PWTB_StackAlloc
+
+    ret
  .endm
  
  
diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h

index 1c1f27f..0f503c3 100644 (file)
--- a/src/coreclr/vm/callingconvention.h
+++ b/src/coreclr/vm/callingconvention.h
@@ -1692,17 +1692,7 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
          }
          else
          {
-            MethodTable* pMethodTable = nullptr;
-
-            if (!thValueType.IsTypeDesc())
-                pMethodTable = thValueType.AsMethodTable();
-            else
-            {
-                _ASSERTE(thValueType.IsNativeValueType());
-                pMethodTable = thValueType.AsNativeValueType();
-            }
-            _ASSERTE(pMethodTable != nullptr);
-            flags = MethodTable::GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
+            flags = MethodTable::GetLoongArch64PassStructInRegisterFlags(thValueType);
              if (flags & STRUCT_HAS_FLOAT_FIELDS_MASK)
              {
                  cFPRegs = (flags & STRUCT_FLOAT_FIELD_ONLY_TWO) ? 2 : 1;
@@ -1816,17 +1806,7 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
          }
          else
          {
-            MethodTable* pMethodTable = nullptr;
-
-            if (!thValueType.IsTypeDesc())
-                pMethodTable = thValueType.AsMethodTable();
-            else
-            {
-                _ASSERTE(thValueType.IsNativeValueType());
-                pMethodTable = thValueType.AsNativeValueType();
-            }
-            _ASSERTE(pMethodTable != nullptr);
-            flags = MethodTable::GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
+            flags = MethodTable::GetRiscV64PassStructInRegisterFlags(thValueType);
              if (flags & STRUCT_HAS_FLOAT_FIELDS_MASK)
              {
                  cFPRegs = (flags & STRUCT_FLOAT_FIELD_ONLY_TWO) ? 2 : 1;
@@ -2029,18 +2009,14 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ComputeReturnFlags()
              if  (size <= ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE)
              {
                  assert(!thValueType.IsTypeDesc());
-
-                MethodTable *pMethodTable = thValueType.AsMethodTable();
-                flags = (MethodTable::GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable) & 0xff) << RETURN_FP_SIZE_SHIFT;
+                flags = (MethodTable::GetLoongArch64PassStructInRegisterFlags(thValueType) & 0xff) << RETURN_FP_SIZE_SHIFT;
                  break;
              }
  #elif defined(TARGET_RISCV64)
              if  (size <= ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE)
              {
                  assert(!thValueType.IsTypeDesc());
-
-                MethodTable *pMethodTable = thValueType.AsMethodTable();
-                flags = (MethodTable::GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable) & 0xff) << RETURN_FP_SIZE_SHIFT;
+                flags = (MethodTable::GetRiscV64PassStructInRegisterFlags(thValueType) & 0xff) << RETURN_FP_SIZE_SHIFT;
                  break;
              }
  #else
diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp

index 1fd8986..6a06977 100644 (file)
--- a/src/coreclr/vm/ceemain.cpp
+++ b/src/coreclr/vm/ceemain.cpp
@@ -1243,6 +1243,8 @@ void STDMETHODCALLTYPE EEShutDownHelper(BOOL fIsDllUnloading)
          // This will check a flag and do nothing if not enabled.
          Interpreter::PrintPostMortemData();
  #endif // FEATURE_INTERPRETER
+        VirtualCallStubManager::LogFinalStats();
+        WriteJitHelperCountToSTRESSLOG();
  
  #ifdef PROFILING_SUPPORTED
          // If profiling is enabled, then notify of shutdown first so that the
@@ -1356,9 +1358,6 @@ part2:
                  Interpreter::Terminate();
  #endif // FEATURE_INTERPRETER
  
-                //@TODO: find the right place for this
-                VirtualCallStubManager::UninitStatic();
-
                  // Unregister our vectored exception and continue handlers from the OS.
                  // This will ensure that if any other DLL unload (after ours) has an exception,
                  // we wont attempt to process that exception (which could lead to various
@@ -1376,8 +1375,6 @@ part2:
                  Disassembler::StaticClose();
  #endif // USE_DISASSEMBLER
  
-                WriteJitHelperCountToSTRESSLOG();
-
                  STRESS_LOG0(LF_STARTUP, LL_INFO10, "EEShutdown shutting down logging");
  
  #if 0       // Dont clean up the stress log, so that even at process exit we have a log (after all the process is going away
diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp

index 454540a..3bb3605 100644 (file)
--- a/src/coreclr/vm/jitinterface.cpp
+++ b/src/coreclr/vm/jitinterface.cpp
@@ -9520,7 +9520,7 @@ uint32_t CEEInfo::getLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE c
      uint32_t size = STRUCT_NO_FLOAT_FIELD;
  
  #if defined(TARGET_LOONGARCH64)
-    size = (uint32_t)MethodTable::GetLoongArch64PassStructInRegisterFlags(cls);
+    size = (uint32_t)MethodTable::GetLoongArch64PassStructInRegisterFlags(TypeHandle(cls));
  #endif
  
      EE_TO_JIT_TRANSITION_LEAF();
@@ -9541,7 +9541,7 @@ uint32_t CEEInfo::getRISCV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
      uint32_t size = STRUCT_NO_FLOAT_FIELD;
  
  #if defined(TARGET_RISCV64)
-    size = (uint32_t)MethodTable::GetRiscV64PassStructInRegisterFlags(cls);
+    size = (uint32_t)MethodTable::GetRiscV64PassStructInRegisterFlags(TypeHandle(cls));
  #endif // TARGET_RISCV64
  
      EE_TO_JIT_TRANSITION_LEAF();
diff --git a/src/coreclr/vm/methodtable.cpp b/src/coreclr/vm/methodtable.cpp

index 308375c..7014bc0 100644 (file)
--- a/src/coreclr/vm/methodtable.cpp
+++ b/src/coreclr/vm/methodtable.cpp
@@ -2192,6 +2192,11 @@ namespace
          }
  
          DWORD numIntroducedFields = pMT->GetNumIntroducedInstanceFields();
+        if (numIntroducedFields != 1)
+        {
+            return false;
+        }
+
          FieldDesc *pFieldStart = pMT->GetApproxFieldDescListRaw();
          CorElementType firstFieldElementType = pFieldStart->GetFieldType();
  
@@ -2202,8 +2207,7 @@ namespace
          // instead of adding additional padding at the end of a one-field structure.
          // We do this check here to save looking up the FixedBufferAttribute when loading the field
          // from metadata.
-        return numIntroducedFields == 1
-                        && ( CorTypeInfo::IsPrimitiveType_NoThrow(firstFieldElementType)
+        return (CorTypeInfo::IsPrimitiveType_NoThrow(firstFieldElementType)
                              || firstFieldElementType == ELEMENT_TYPE_VALUETYPE)
                          && (pFieldStart->GetOffset() == 0)
                          && pMT->HasLayout()
@@ -2916,1231 +2920,191 @@ void  MethodTable::AssignClassifiedEightByteTypes(SystemVStructRegisterPassingHe
  
  #endif // defined(UNIX_AMD64_ABI_ITF)
  
-#if defined(TARGET_LOONGARCH64)
-
-bool MethodTable::IsLoongArch64OnlyOneField(MethodTable * pMT)
+#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
+static bool HandleInlineArray(int elementTypeIndex, int nElements, StructFloatFieldInfoFlags types[2], int& typeIndex)
  {
-    TypeHandle th(pMT);
-
-    bool useNativeLayout      = false;
-    bool ret                  = false;
-    MethodTable* pMethodTable = nullptr;
-
-    if (!th.IsTypeDesc())
-    {
-        pMethodTable = th.AsMethodTable();
-        if (pMethodTable->HasLayout())
-        {
-            useNativeLayout = true;
-        }
-        else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
-        {
-            DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields();
+    int nFlattenedFieldsPerElement = typeIndex - elementTypeIndex;
+    if (nFlattenedFieldsPerElement == 0)
+        return true;
  
-            if (numIntroducedFields == 1)
-            {
-                FieldDesc *pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
+    assert(nFlattenedFieldsPerElement == 1 || nFlattenedFieldsPerElement == 2);
  
-                CorElementType fieldType = pFieldStart[0].GetFieldType();
+    if (nElements > 2)
+        return false;
  
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    ret = true;
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldStart->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (pMethodTable->GetNumIntroducedInstanceFields() == 1)
-                    {
-                        ret = IsLoongArch64OnlyOneField(pMethodTable);
-                    }
-                }
-            }
-            goto _End_arg;
-        }
-    }
-    else
+    if (nElements == 2)
      {
-        _ASSERTE(th.IsNativeValueType());
+        if (typeIndex + nFlattenedFieldsPerElement > 2)
+            return false;
  
-        useNativeLayout = true;
-        pMethodTable = th.AsNativeValueType();
+        assert(elementTypeIndex == 0);
+        assert(typeIndex == 1);
+        types[typeIndex] = types[elementTypeIndex]; // duplicate the array element type
      }
-    _ASSERTE(pMethodTable != nullptr);
+    return true;
+}
  
-    if (useNativeLayout)
+static bool FlattenFieldTypes(TypeHandle th, StructFloatFieldInfoFlags types[2], int& typeIndex)
+{
+    bool isManaged = !th.IsTypeDesc();
+    MethodTable* pMT = isManaged ? th.AsMethodTable() : th.AsNativeValueType();
+    int nFields = isManaged ? pMT->GetNumIntroducedInstanceFields() : pMT->GetNativeLayoutInfo()->GetNumFields();
+
+    // TODO: templatize isManaged and use if constexpr for differences when we migrate to C++17
+    // because the logic for both branches is nearly the same.
+    if (isManaged)
      {
-        if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
+        FieldDesc* fields = pMT->GetApproxFieldDescListRaw();
+        int elementTypeIndex = typeIndex;
+        for (int i = 0; i < nFields; ++i)
          {
-            DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields();
-            FieldDesc *pFieldStart = nullptr;
+            if (i > 0 && fields[i-1].GetOffset() + fields[i-1].GetSize() > fields[i].GetOffset())
+                return false; // overlapping fields
  
-            if (numIntroducedFields == 1)
+            CorElementType type = fields[i].GetFieldType();
+            if (type == ELEMENT_TYPE_VALUETYPE)
              {
-                pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                CorElementType fieldType = pFieldStart->GetFieldType();
-
-                // InlineArray types and fixed buffer types have implied repeated fields.
-                // Checking if a type is an InlineArray type is cheap, so we'll do that first.
-                bool hasImpliedRepeatedFields = HasImpliedRepeatedFields(pMethodTable);
-
-                if (hasImpliedRepeatedFields)
-                {
-                    numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize();
-                    if (numIntroducedFields != 1)
-                    {
-                        goto _End_arg;
-                    }
-                }
+                MethodTable* nested = fields[i].GetApproxFieldTypeHandleThrowing().GetMethodTable();
+                if (!FlattenFieldTypes(TypeHandle(nested), types, typeIndex))
+                    return false;
+            }
+            else if (fields[i].GetSize() <= TARGET_POINTER_SIZE)
+            {
+                if (typeIndex >= 2)
+                    return false;
  
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    ret = true;
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
-                    NativeFieldCategory nfc = pNativeFieldDescs->GetCategory();
-                    if (nfc == NativeFieldCategory::NESTED)
-                    {
-                        pMethodTable = pNativeFieldDescs->GetNestedNativeMethodTable();
-                        ret = IsLoongArch64OnlyOneField(pMethodTable);
-                    }
-                    else if (nfc != NativeFieldCategory::ILLEGAL)
-                    {
-                        ret = true;
-                    }
-                }
+                StructFloatFieldInfoFlags retType = StructFloatFieldInfoFlags(
+                    (CorTypeInfo::IsFloat_NoThrow(type) ? STRUCT_FLOAT_FIELD_FIRST : 0) |
+                    (CorTypeInfo::Size_NoThrow(type) == TARGET_POINTER_SIZE ? STRUCT_FIRST_FIELD_SIZE_IS8 : 0));
+                types[typeIndex++] = retType;
              }
              else
              {
-                ret = false;
+                return false;
              }
          }
-    }
-_End_arg:
-
-    return ret;
-}
-
-int MethodTable::GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
-{
-    TypeHandle th(cls);
-
-    bool useNativeLayout           = false;
-    int size = STRUCT_NO_FLOAT_FIELD;
-    MethodTable* pMethodTable      = nullptr;
  
-    if (!th.IsTypeDesc())
-    {
-        pMethodTable = th.AsMethodTable();
-        if (pMethodTable->HasLayout())
+        if (HasImpliedRepeatedFields(pMT)) // inline array or fixed buffer
          {
-            useNativeLayout = true;
+            assert(nFields == 1);
+            int nElements = pMT->GetNumInstanceFieldBytes() / fields[0].GetSize();
+            if (!HandleInlineArray(elementTypeIndex, nElements, types, typeIndex))
+                return false;
          }
-        else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
+    }
+    else // native layout
+    {
+        const NativeFieldDescriptor* fields = pMT->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
+        for (int i = 0; i < nFields; ++i)
          {
-            DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields();
+            if (i > 0 && fields[i-1].GetExternalOffset() + fields[i-1].NativeSize() > fields[i].GetExternalOffset())
+                return false; // overlapping fields
  
-            if (numIntroducedFields == 1)
+            NativeFieldCategory category = fields[i].GetCategory();
+            if (category == NativeFieldCategory::NESTED)
              {
-                FieldDesc *pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
+                int elementTypeIndex = typeIndex;
  
-                CorElementType fieldType = pFieldStart[0].GetFieldType();
+                MethodTable* nested = fields[i].GetNestedNativeMethodTable();
+                if (!FlattenFieldTypes(TypeHandle(nested), types, typeIndex))
+                    return false;
  
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = STRUCT_FLOAT_FIELD_ONLY_ONE;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldStart->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    size = GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
-                }
+                // In native layout fixed arrays are marked as NESTED just like structs
+                int nElements = fields[i].GetNumElements();
+                if (!HandleInlineArray(elementTypeIndex, nElements, types, typeIndex))
+                    return false;
              }
-            else if (numIntroducedFields == 2)
+            else if (fields[i].NativeSize() <= TARGET_POINTER_SIZE)
              {
-                FieldDesc *pFieldSecond;
-                FieldDesc *pFieldFirst = pMethodTable->GetApproxFieldDescListRaw();
-                if (pFieldFirst->GetOffset() == 0)
-                {
-                    pFieldSecond = pFieldFirst + 1;
-                }
-                else
-                {
-                    pFieldSecond = pFieldFirst;
-                    pFieldFirst  = pFieldFirst + 1;
-                }
-                assert(pFieldFirst->GetOffset() == 0);
-
-                if (pFieldFirst->GetSize() > 8)
-                {
-                    goto _End_arg;
-                }
-
-                CorElementType fieldType = pFieldFirst[0].GetFieldType();
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = STRUCT_FLOAT_FIELD_FIRST;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = STRUCT_FIRST_FIELD_DOUBLE;
-                    }
-                    else if (pFieldFirst[0].GetSize() == 8)
-                    {
-                        size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldFirst->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (IsLoongArch64OnlyOneField(pMethodTable))
-                    {
-                        size = GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
-                        if ((size & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
-                        {
-                            size = pFieldFirst[0].GetSize() == 8 ? STRUCT_FIRST_FIELD_DOUBLE : STRUCT_FLOAT_FIELD_FIRST;
-                        }
-                        else if (size == STRUCT_NO_FLOAT_FIELD)
-                        {
-                            size = pFieldFirst[0].GetSize() == 8 ? STRUCT_FIRST_FIELD_SIZE_IS8: 0;
-                        }
-                        else
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-                    }
-                    else
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                        goto _End_arg;
-                    }
-                }
-                else if (pFieldFirst[0].GetSize() == 8)
-                {
-                    size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                }
+                if (typeIndex >= 2)
+                    return false;
  
-                fieldType = pFieldSecond[0].GetFieldType();
-                if (pFieldSecond[0].GetSize() > 8)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                    goto _End_arg;
-                }
-                else if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                    }
-                    else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                    else if (pFieldSecond[0].GetSize() == 8)
-                    {
-                        size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldSecond[0].GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (IsLoongArch64OnlyOneField(pMethodTable))
-                    {
-                        int size2 = GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
-                        if ((size2 & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
-                        {
-                            if (pFieldSecond[0].GetSize() == 8)
-                            {
-                                size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                            }
-                            else
-                            {
-                                size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                            }
-                        }
-                        else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                        }
-                        else if (size2 == STRUCT_NO_FLOAT_FIELD)
-                        {
-                            size |= pFieldSecond[0].GetSize() == 8 ? STRUCT_SECOND_FIELD_SIZE_IS8 : 0;
-                        }
-                        else
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                        }
-                    }
-                    else
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                }
-                else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                }
-                else if (pFieldSecond[0].GetSize() == 8)
-                {
-                    size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                }
+                StructFloatFieldInfoFlags type = StructFloatFieldInfoFlags(
+                    (category == NativeFieldCategory::FLOAT ? STRUCT_FLOAT_FIELD_FIRST : 0) |
+                    (fields[i].NativeSize() == TARGET_POINTER_SIZE ? STRUCT_FIRST_FIELD_SIZE_IS8 : 0));
+                types[typeIndex++] = type;
+            }
+            else
+            {
+                return false;
              }
-
-            goto _End_arg;
          }
      }
-    else
-    {
-        _ASSERTE(th.IsNativeValueType());
-
-        useNativeLayout = true;
-        pMethodTable = th.AsNativeValueType();
-    }
-    _ASSERTE(pMethodTable != nullptr);
-
-    if (useNativeLayout)
-    {
-        if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
-        {
-            DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields();
-            FieldDesc *pFieldStart = nullptr;
-
-            if (numIntroducedFields == 1)
-            {
-                pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                CorElementType fieldType = pFieldStart->GetFieldType();
+    return true;
+}
+#endif
  
-                // InlineArray types and fixed buffer types have implied repeated fields.
-                // Checking if a type is an InlineArray type is cheap, so we'll do that first.
-                bool hasImpliedRepeatedFields = HasImpliedRepeatedFields(pMethodTable);
+#if defined(TARGET_LOONGARCH64)
+int MethodTable::GetLoongArch64PassStructInRegisterFlags(TypeHandle th)
+{
+    if (th.GetSize() > ENREGISTERED_PARAMTYPE_MAXSIZE)
+        return STRUCT_NO_FLOAT_FIELD;
  
-                if (hasImpliedRepeatedFields)
-                {
-                    numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize();
-                    if (numIntroducedFields > 2)
-                    {
-                        goto _End_arg;
-                    }
+    StructFloatFieldInfoFlags types[2] = {STRUCT_NO_FLOAT_FIELD, STRUCT_NO_FLOAT_FIELD};
+    int nFields = 0;
+    if (!FlattenFieldTypes(th, types, nFields) || nFields == 0)
+        return STRUCT_NO_FLOAT_FIELD;
  
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        if (numIntroducedFields == 1)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_ONE;
-                        }
-                        else if (numIntroducedFields == 2)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_TWO;
-                        }
-                        goto _End_arg;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        if (numIntroducedFields == 1)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
-                        }
-                        else if (numIntroducedFields == 2)
-                        {
-                            size = STRUCT_FIELD_TWO_DOUBLES;
-                        }
-                        goto _End_arg;
-                    }
-                }
+    assert(nFields == 1 || nFields == 2);
  
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = STRUCT_FLOAT_FIELD_ONLY_ONE;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
-                    NativeFieldCategory nfc = pNativeFieldDescs->GetCategory();
-                    if (nfc == NativeFieldCategory::NESTED)
-                    {
-                        pMethodTable = pNativeFieldDescs->GetNestedNativeMethodTable();
-                        size = GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
-                        return size;
-                    }
-                    else if (nfc == NativeFieldCategory::FLOAT)
-                    {
-                        if (pFieldStart->GetSize() == 4)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_ONE;
-                        }
-                        else if (pFieldStart->GetSize() == 8)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
-                        }
-                    }
-                }
-            }
-            else if (numIntroducedFields == 2)
-            {
-                pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
+    static_assert((STRUCT_FLOAT_FIELD_SECOND | STRUCT_SECOND_FIELD_SIZE_IS8)
+        == (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FIRST_FIELD_SIZE_IS8) << 1,
+        "SECOND flags need to be FIRST shifted by 1");
+    int flags = types[0] | (types[1] << 1);
  
-                if (pFieldStart->GetSize() > 8)
-                {
-                    goto _End_arg;
-                }
+    static const int bothFloat = STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND;
+    if ((flags & bothFloat) == 0)
+        return STRUCT_NO_FLOAT_FIELD;
  
-                if (pFieldStart->GetOffset() || !pFieldStart[1].GetOffset() || (pFieldStart[0].GetSize() > pFieldStart[1].GetOffset()))
-                {
-                    goto _End_arg;
-                }
+    if ((flags & bothFloat) == bothFloat)
+    {
+        assert(nFields == 2);
+        flags ^= (bothFloat | STRUCT_FLOAT_FIELD_ONLY_TWO); // replace bothFloat with ONLY_TWO
+    }
+    else if (nFields == 1)
+    {
+        assert((flags & STRUCT_FLOAT_FIELD_FIRST) != 0);
+        flags ^= (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_ONLY_ONE); // replace FIRST with ONLY_ONE
+    }
  
-                CorElementType fieldType = pFieldStart[0].GetFieldType();
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = STRUCT_FLOAT_FIELD_FIRST;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = STRUCT_FIRST_FIELD_DOUBLE;
-                    }
-                    else if (pFieldStart[0].GetSize() == 8)
-                    {
-                        size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
+    return flags;
+}
+#endif
  
-                    fieldType = pFieldStart[1].GetFieldType();
-                    if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                    {
-                        if (fieldType == ELEMENT_TYPE_R4)
-                        {
-                            size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                        }
-                        else if (fieldType == ELEMENT_TYPE_R8)
-                        {
-                            size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                        }
-                        else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                        }
-                        else if (pFieldStart[1].GetSize() == 8)
-                        {
-                            size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                        }
-                        goto _End_arg;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
+#if defined(TARGET_RISCV64)
+int MethodTable::GetRiscV64PassStructInRegisterFlags(TypeHandle th)
+{
+    if (th.GetSize() > ENREGISTERED_PARAMTYPE_MAXSIZE)
+        return STRUCT_NO_FLOAT_FIELD;
  
-                    NativeFieldCategory nfc = pNativeFieldDescs->GetCategory();
+    StructFloatFieldInfoFlags types[2] = {STRUCT_NO_FLOAT_FIELD, STRUCT_NO_FLOAT_FIELD};
+    int nFields = 0;
+    if (!FlattenFieldTypes(th, types, nFields) || nFields == 0)
+        return STRUCT_NO_FLOAT_FIELD;
  
-                    if (nfc == NativeFieldCategory::NESTED)
-                    {
-                        if (pNativeFieldDescs->GetNumElements() != 1)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
+    assert(nFields == 1 || nFields == 2);
  
-                        MethodTable* pMethodTable2 = pNativeFieldDescs->GetNestedNativeMethodTable();
+    static_assert((STRUCT_FLOAT_FIELD_SECOND | STRUCT_SECOND_FIELD_SIZE_IS8)
+        == (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FIRST_FIELD_SIZE_IS8) << 1,
+        "SECOND flags need to be FIRST shifted by 1");
+    int flags = types[0] | (types[1] << 1);
  
-                        if (!IsLoongArch64OnlyOneField(pMethodTable2))
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
+    static const int bothFloat = STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_SECOND;
+    if ((flags & bothFloat) == 0)
+        return STRUCT_NO_FLOAT_FIELD;
  
-                        size = GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable2);
-                        if ((size & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
-                        {
-                            if (pFieldStart->GetSize() == 8)
-                            {
-                                size = STRUCT_FIRST_FIELD_DOUBLE;
-                            }
-                            else
-                            {
-                                size = STRUCT_FLOAT_FIELD_FIRST;
-                            }
-                        }
-                        else if (pFieldStart->GetSize() == 8)
-                        {
-                            size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                        }
-                        else
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-                    }
-                    else if (nfc == NativeFieldCategory::FLOAT)
-                    {
-                        if (pFieldStart[0].GetSize() == 4)
-                        {
-                            size = STRUCT_FLOAT_FIELD_FIRST;
-                        }
-                        else if (pFieldStart[0].GetSize() == 8)
-                        {
-                            _ASSERTE((pMethodTable->GetNativeSize() == 8) || (pMethodTable->GetNativeSize() == 16));
-                            size = STRUCT_FIRST_FIELD_DOUBLE;
-                        }
-                    }
-                    else if (pFieldStart[0].GetSize() == 8)
-                    {
-                        size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_CLASS)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                    goto _End_arg;
-                }
-                else if (pFieldStart[0].GetSize() == 8)
-                {
-                    size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                }
-
-                fieldType = pFieldStart[1].GetFieldType();
-                if (pFieldStart[1].GetSize() > 8)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                    goto _End_arg;
-                }
-                else if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                    }
-                    else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                    else if (pFieldStart[1].GetSize() == 8)
-                    {
-                        size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
-                    NativeFieldCategory nfc = pNativeFieldDescs[1].GetCategory();
-
-                    if (nfc == NativeFieldCategory::NESTED)
-                    {
-                        if (pNativeFieldDescs[1].GetNumElements() != 1)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-
-                        MethodTable* pMethodTable2 = pNativeFieldDescs[1].GetNestedNativeMethodTable();
-
-                        if (!IsLoongArch64OnlyOneField(pMethodTable2))
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-
-                        if ((GetLoongArch64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable2) & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
-                        {
-                            if (pFieldStart[1].GetSize() == 4)
-                            {
-                                size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                            }
-                            else if (pFieldStart[1].GetSize() == 8)
-                            {
-                                size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                            }
-                        }
-                        else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                        }
-                        else if (pFieldStart[1].GetSize() == 8)
-                        {
-                            size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                        }
-                    }
-                    else if (nfc == NativeFieldCategory::FLOAT)
-                    {
-                        if (pFieldStart[1].GetSize() == 4)
-                        {
-                            size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                        }
-                        else if (pFieldStart[1].GetSize() == 8)
-                        {
-                            size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                        }
-                    }
-                    else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                    else if (pFieldStart[1].GetSize() == 8)
-                    {
-                        size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_CLASS)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                    goto _End_arg;
-                }
-                else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                }
-                else if (pFieldStart[1].GetSize() == 8)
-                {
-                    size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                }
-            }
-        }
-    }
-_End_arg:
-
-    return size;
-}
-#endif
-
-#if defined(TARGET_RISCV64)
-
-bool MethodTable::IsRiscV64OnlyOneField(MethodTable * pMT)
-{
-    TypeHandle th(pMT);
-
-    bool useNativeLayout      = false;
-    bool ret                  = false;
-    MethodTable* pMethodTable = nullptr;
-
-    if (!th.IsTypeDesc())
+    if ((flags & bothFloat) == bothFloat)
      {
-        pMethodTable = th.AsMethodTable();
-        if (pMethodTable->HasLayout())
-        {
-            useNativeLayout = true;
-        }
-        else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
-        {
-            DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields();
-
-            if (numIntroducedFields == 1)
-            {
-                FieldDesc *pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                CorElementType fieldType = pFieldStart[0].GetFieldType();
-
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    ret = true;
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldStart->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (pMethodTable->GetNumIntroducedInstanceFields() == 1)
-                    {
-                        ret = IsRiscV64OnlyOneField(pMethodTable);
-                    }
-                }
-            }
-            goto _End_arg;
-        }
-    }
-    else
-    {
-        _ASSERTE(th.IsNativeValueType());
-
-        useNativeLayout = true;
-        pMethodTable = th.AsNativeValueType();
-    }
-    _ASSERTE(pMethodTable != nullptr);
-
-    if (useNativeLayout)
-    {
-        if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
-        {
-            DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields();
-            FieldDesc *pFieldStart = nullptr;
-
-            if (numIntroducedFields == 1)
-            {
-                pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                CorElementType fieldType = pFieldStart->GetFieldType();
-
-                // InlineArray types and fixed buffer types have implied repeated fields.
-                // Checking if a type is an InlineArray type is cheap, so we'll do that first.
-                bool hasImpliedRepeatedFields = HasImpliedRepeatedFields(pMethodTable);
-
-                if (hasImpliedRepeatedFields)
-                {
-                    numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize();
-                    if (numIntroducedFields != 1)
-                    {
-                        goto _End_arg;
-                    }
-                }
-
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    ret = true;
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
-                    NativeFieldCategory nfc = pNativeFieldDescs->GetCategory();
-                    if (nfc == NativeFieldCategory::NESTED)
-                    {
-                        pMethodTable = pNativeFieldDescs->GetNestedNativeMethodTable();
-                        ret = IsRiscV64OnlyOneField(pMethodTable);
-                    }
-                    else if (nfc != NativeFieldCategory::ILLEGAL)
-                    {
-                        ret = true;
-                    }
-                }
-            }
-            else
-            {
-                ret = false;
-            }
-        }
+        assert(nFields == 2);
+        flags ^= (bothFloat | STRUCT_FLOAT_FIELD_ONLY_TWO); // replace bothFloat with ONLY_TWO
      }
-_End_arg:
-
-    return ret;
-}
-
-int MethodTable::GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE cls)
-{
-    TypeHandle th(cls);
-
-    bool useNativeLayout           = false;
-    int size = STRUCT_NO_FLOAT_FIELD;
-    MethodTable* pMethodTable      = nullptr;
-
-    if (!th.IsTypeDesc())
+    else if (nFields == 1)
      {
-        pMethodTable = th.AsMethodTable();
-        if (pMethodTable->HasLayout())
-        {
-            useNativeLayout = true;
-        }
-        else if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
-        {
-            DWORD numIntroducedFields = pMethodTable->GetNumIntroducedInstanceFields();
-
-            if (numIntroducedFields == 1)
-            {
-                FieldDesc *pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                CorElementType fieldType = pFieldStart[0].GetFieldType();
-
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = STRUCT_FLOAT_FIELD_ONLY_ONE;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldStart->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    size = GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
-                }
-            }
-            else if (numIntroducedFields == 2)
-            {
-                FieldDesc *pFieldSecond;
-                FieldDesc *pFieldFirst = pMethodTable->GetApproxFieldDescListRaw();
-                if (pFieldFirst->GetOffset() == 0)
-                {
-                    pFieldSecond = pFieldFirst + 1;
-                }
-                else
-                {
-                    pFieldSecond = pFieldFirst;
-                    pFieldFirst  = pFieldFirst + 1;
-                }
-                assert(pFieldFirst->GetOffset() == 0);
-
-                if (pFieldFirst->GetSize() > 8)
-                {
-                    goto _End_arg;
-                }
-
-                CorElementType fieldType = pFieldFirst[0].GetFieldType();
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = STRUCT_FLOAT_FIELD_FIRST;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = STRUCT_FIRST_FIELD_DOUBLE;
-                    }
-                    else if (pFieldFirst[0].GetSize() == 8)
-                    {
-                        size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldFirst->GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (IsRiscV64OnlyOneField(pMethodTable))
-                    {
-                        size = GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
-                        if ((size & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
-                        {
-                            size = pFieldFirst[0].GetSize() == 8 ? STRUCT_FIRST_FIELD_DOUBLE : STRUCT_FLOAT_FIELD_FIRST;
-                        }
-                        else if (size == STRUCT_NO_FLOAT_FIELD)
-                        {
-                            size = pFieldFirst[0].GetSize() == 8 ? STRUCT_FIRST_FIELD_SIZE_IS8: 0;
-                        }
-                        else
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-                    }
-                    else
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                        goto _End_arg;
-                    }
-                }
-                else if (pFieldFirst[0].GetSize() == 8)
-                {
-                    size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                }
-
-                fieldType = pFieldSecond[0].GetFieldType();
-                if (pFieldSecond[0].GetSize() > 8)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                    goto _End_arg;
-                }
-                else if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                    }
-                    else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                    else if (pFieldSecond[0].GetSize() == 8)
-                    {
-                        size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    pMethodTable  = pFieldSecond[0].GetApproxFieldTypeHandleThrowing().GetMethodTable();
-                    if (IsRiscV64OnlyOneField(pMethodTable))
-                    {
-                        int size2 = GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
-                        if ((size2 & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
-                        {
-                            if (pFieldSecond[0].GetSize() == 8)
-                            {
-                                size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                            }
-                            else
-                            {
-                                size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                            }
-                        }
-                        else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                        }
-                        else if (size2 == STRUCT_NO_FLOAT_FIELD)
-                        {
-                            size |= pFieldSecond[0].GetSize() == 8 ? STRUCT_SECOND_FIELD_SIZE_IS8 : 0;
-                        }
-                        else
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                        }
-                    }
-                    else
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                }
-                else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                }
-                else if (pFieldSecond[0].GetSize() == 8)
-                {
-                    size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                }
-            }
-
-            goto _End_arg;
-        }
-    }
-    else
-    {
-        _ASSERTE(th.IsNativeValueType());
-
-        useNativeLayout = true;
-        pMethodTable = th.AsNativeValueType();
-    }
-    _ASSERTE(pMethodTable != nullptr);
-
-    if (useNativeLayout)
-    {
-        if (th.GetSize() <= 16 /*MAX_PASS_MULTIREG_BYTES*/)
-        {
-            DWORD numIntroducedFields = pMethodTable->GetNativeLayoutInfo()->GetNumFields();
-            FieldDesc *pFieldStart = nullptr;
-
-            if (numIntroducedFields == 1)
-            {
-                pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                CorElementType fieldType = pFieldStart->GetFieldType();
-
-                // InlineArray types and fixed buffer types have implied repeated fields.
-                // Checking if a type is an InlineArray type is cheap, so we'll do that first.
-                bool hasImpliedRepeatedFields = HasImpliedRepeatedFields(pMethodTable);
-
-                if (hasImpliedRepeatedFields)
-                {
-                    numIntroducedFields = pMethodTable->GetNumInstanceFieldBytes() / pFieldStart->GetSize();
-                    if (numIntroducedFields > 2)
-                    {
-                        goto _End_arg;
-                    }
-
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        if (numIntroducedFields == 1)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_ONE;
-                        }
-                        else if (numIntroducedFields == 2)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_TWO;
-                        }
-                        goto _End_arg;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        if (numIntroducedFields == 1)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
-                        }
-                        else if (numIntroducedFields == 2)
-                        {
-                            size = STRUCT_FIELD_TWO_DOUBLES;
-                        }
-                        goto _End_arg;
-                    }
-                }
-
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = STRUCT_FLOAT_FIELD_ONLY_ONE;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
-                    NativeFieldCategory nfc = pNativeFieldDescs->GetCategory();
-                    if (nfc == NativeFieldCategory::NESTED)
-                    {
-                        pMethodTable = pNativeFieldDescs->GetNestedNativeMethodTable();
-                        size = GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable);
-                        return size;
-                    }
-                    else if (nfc == NativeFieldCategory::FLOAT)
-                    {
-                        if (pFieldStart->GetSize() == 4)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_ONE;
-                        }
-                        else if (pFieldStart->GetSize() == 8)
-                        {
-                            size = STRUCT_FLOAT_FIELD_ONLY_ONE | STRUCT_FIRST_FIELD_SIZE_IS8;
-                        }
-                    }
-                }
-            }
-            else if (numIntroducedFields == 2)
-            {
-                pFieldStart = pMethodTable->GetApproxFieldDescListRaw();
-
-                if (pFieldStart->GetSize() > 8)
-                {
-                    goto _End_arg;
-                }
-
-                if (pFieldStart->GetOffset() || !pFieldStart[1].GetOffset() || (pFieldStart[0].GetSize() > pFieldStart[1].GetOffset()))
-                {
-                    goto _End_arg;
-                }
-
-                CorElementType fieldType = pFieldStart[0].GetFieldType();
-                if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = STRUCT_FLOAT_FIELD_FIRST;
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = STRUCT_FIRST_FIELD_DOUBLE;
-                    }
-                    else if (pFieldStart[0].GetSize() == 8)
-                    {
-                        size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-
-                    fieldType = pFieldStart[1].GetFieldType();
-                    if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                    {
-                        if (fieldType == ELEMENT_TYPE_R4)
-                        {
-                            size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                        }
-                        else if (fieldType == ELEMENT_TYPE_R8)
-                        {
-                            size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                        }
-                        else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                        }
-                        else if (pFieldStart[1].GetSize() == 8)
-                        {
-                            size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                        }
-                        goto _End_arg;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
-
-                    NativeFieldCategory nfc = pNativeFieldDescs->GetCategory();
-
-                    if (nfc == NativeFieldCategory::NESTED)
-                    {
-                        if (pNativeFieldDescs->GetNumElements() != 1)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-
-                        MethodTable* pMethodTable2 = pNativeFieldDescs->GetNestedNativeMethodTable();
-
-                        if (!IsRiscV64OnlyOneField(pMethodTable2))
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-
-                        size = GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable2);
-                        if ((size & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
-                        {
-                            if (pFieldStart->GetSize() == 8)
-                            {
-                                size = STRUCT_FIRST_FIELD_DOUBLE;
-                            }
-                            else
-                            {
-                                size = STRUCT_FLOAT_FIELD_FIRST;
-                            }
-                        }
-                        else if (pFieldStart->GetSize() == 8)
-                        {
-                            size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                        }
-                        else
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-                    }
-                    else if (nfc == NativeFieldCategory::FLOAT)
-                    {
-                        if (pFieldStart[0].GetSize() == 4)
-                        {
-                            size = STRUCT_FLOAT_FIELD_FIRST;
-                        }
-                        else if (pFieldStart[0].GetSize() == 8)
-                        {
-                            _ASSERTE((pMethodTable->GetNativeSize() == 8) || (pMethodTable->GetNativeSize() == 16));
-                            size = STRUCT_FIRST_FIELD_DOUBLE;
-                        }
-                    }
-                    else if (pFieldStart[0].GetSize() == 8)
-                    {
-                        size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                    }
-                }
-                else if (pFieldStart[0].GetSize() == 8)
-                {
-                    size = STRUCT_FIRST_FIELD_SIZE_IS8;
-                }
-
-                fieldType = pFieldStart[1].GetFieldType();
-                if (pFieldStart[1].GetSize() > 8)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                    goto _End_arg;
-                }
-                else if (CorTypeInfo::IsPrimitiveType_NoThrow(fieldType))
-                {
-                    if (fieldType == ELEMENT_TYPE_R4)
-                    {
-                        size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                    }
-                    else if (fieldType == ELEMENT_TYPE_R8)
-                    {
-                        size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                    }
-                    else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                    else if (pFieldStart[1].GetSize() == 8)
-                    {
-                        size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                    }
-
-                    // Pass with two integer registers in `struct {int a, int b, float/double c}` cases
-                    if ((size | STRUCT_FIRST_FIELD_SIZE_IS8 | STRUCT_FLOAT_FIELD_SECOND) == size)
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                }
-                else if (fieldType == ELEMENT_TYPE_VALUETYPE)
-                {
-                    const NativeFieldDescriptor *pNativeFieldDescs = pMethodTable->GetNativeLayoutInfo()->GetNativeFieldDescriptors();
-                    NativeFieldCategory nfc = pNativeFieldDescs[1].GetCategory();
-
-                    if (nfc == NativeFieldCategory::NESTED)
-                    {
-                        if (pNativeFieldDescs[1].GetNumElements() != 1)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-
-                        MethodTable* pMethodTable2 = pNativeFieldDescs[1].GetNestedNativeMethodTable();
-
-                        if (!IsRiscV64OnlyOneField(pMethodTable2))
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                            goto _End_arg;
-                        }
-
-                        if ((GetRiscV64PassStructInRegisterFlags((CORINFO_CLASS_HANDLE)pMethodTable2) & STRUCT_FLOAT_FIELD_ONLY_ONE) != 0)
-                        {
-                            if (pFieldStart[1].GetSize() == 4)
-                            {
-                                size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                            }
-                            else if (pFieldStart[1].GetSize() == 8)
-                            {
-                                size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                            }
-                        }
-                        else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                        {
-                            size = STRUCT_NO_FLOAT_FIELD;
-                        }
-                        else if (pFieldStart[1].GetSize() == 8)
-                        {
-                            size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                        }
-                    }
-                    else if (nfc == NativeFieldCategory::FLOAT)
-                    {
-                        if (pFieldStart[1].GetSize() == 4)
-                        {
-                            size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND) : (size | STRUCT_FLOAT_FIELD_SECOND);
-                        }
-                        else if (pFieldStart[1].GetSize() == 8)
-                        {
-                            size = size & STRUCT_FLOAT_FIELD_FIRST ? (size ^ STRUCT_MERGE_FIRST_SECOND_8) : (size | STRUCT_SECOND_FIELD_DOUBLE);
-                        }
-                    }
-                    else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                    {
-                        size = STRUCT_NO_FLOAT_FIELD;
-                    }
-                    else if (pFieldStart[1].GetSize() == 8)
-                    {
-                        size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                    }
-                }
-                else if ((size & STRUCT_FLOAT_FIELD_FIRST) == 0)
-                {
-                    size = STRUCT_NO_FLOAT_FIELD;
-                }
-                else if (pFieldStart[1].GetSize() == 8)
-                {
-                    size |= STRUCT_SECOND_FIELD_SIZE_IS8;
-                }
-            }
-        }
+        assert((flags & STRUCT_FLOAT_FIELD_FIRST) != 0);
+        flags ^= (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_ONLY_ONE); // replace FIRST with ONLY_ONE
      }
-_End_arg:
  
-    return size;
+    return flags;
  }
  #endif
  
diff --git a/src/coreclr/vm/methodtable.h b/src/coreclr/vm/methodtable.h

index b12542a..87ffe77 100644 (file)
--- a/src/coreclr/vm/methodtable.h
+++ b/src/coreclr/vm/methodtable.h
@@ -770,13 +770,9 @@ public:
      void CheckRunClassInitAsIfConstructingThrowing();
  
  #if defined(TARGET_LOONGARCH64)
-    static bool IsLoongArch64OnlyOneField(MethodTable * pMT);
-    static int GetLoongArch64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE clh);
-#endif
-
-#if defined(TARGET_RISCV64)
-    static bool IsRiscV64OnlyOneField(MethodTable * pMT);
-    static int GetRiscV64PassStructInRegisterFlags(CORINFO_CLASS_HANDLE clh);
+    static int GetLoongArch64PassStructInRegisterFlags(TypeHandle th);
+#elif defined(TARGET_RISCV64)
+    static int GetRiscV64PassStructInRegisterFlags(TypeHandle th);
  #endif
  
  #if defined(UNIX_AMD64_ABI_ITF)
diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp

index 4702410..765c1f9 100644 (file)
--- a/src/coreclr/vm/methodtablebuilder.cpp
+++ b/src/coreclr/vm/methodtablebuilder.cpp
@@ -10023,6 +10023,11 @@ void MethodTableBuilder::CheckForSystemTypes()
                      // 16-byte alignment for __m256.
  
                      pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 16;
+    #elif defined(TARGET_RISCV64)
+                    // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+                    // RISC-V Vector Extenstion Intrinsic Document
+                    // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/vector_type_infos.adoc
+                    pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 16;
      #else
                      pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 32; // sizeof(__m256)
      #endif // TARGET_ARM elif TARGET_ARM64
@@ -10039,6 +10044,12 @@ void MethodTableBuilder::CheckForSystemTypes()
                      // 16-byte alignment for __m256.
  
                      pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 16;
+
+    #elif defined(TARGET_RISCV64)
+                    // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+                    // RISC-V Vector Extenstion Intrinsic Document
+                    // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/vector_type_infos.adoc
+                    pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 16;
      #else
                      pLayout->m_ManagedLargestAlignmentRequirementOfAllMembers = 64; // sizeof(__m512)
      #endif // TARGET_ARM elif TARGET_ARM64
diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S

index b64ac87..8035a64 100644 (file)
--- a/src/coreclr/vm/riscv64/asmhelpers.S
+++ b/src/coreclr/vm/riscv64/asmhelpers.S
@@ -382,7 +382,7 @@ NESTED_ENTRY ThePreStub, _TEXT, NoHandler
      EPILOG_BRANCH_REG  t4
  NESTED_END ThePreStub, _TEXT
  
-// ------------------------------------------------------------------\
+// ------------------------------------------------------------------
  
  // EXTERN_C int __fastcall HelperMethodFrameRestoreState(
  // INDEBUG_COMMA(HelperMethodFrame *pFrame)
diff --git a/src/coreclr/vm/riscv64/profiler.cpp b/src/coreclr/vm/riscv64/profiler.cpp

index 1cff4df..fc8eff4 100644 (file)
--- a/src/coreclr/vm/riscv64/profiler.cpp
+++ b/src/coreclr/vm/riscv64/profiler.cpp
@@ -112,10 +112,10 @@ LPVOID ProfileArgIterator::CopyStructFromRegisters(const ArgLocDesc* sir)
      PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle);
  
      struct { bool isFloat, is8; } fields[] = {
-        { sir->m_structFields & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_ONLY_TWO | STRUCT_FLOAT_FIELD_ONLY_ONE),
-          sir->m_structFields & STRUCT_FIRST_FIELD_SIZE_IS8 },
-        { sir->m_structFields & (STRUCT_FLOAT_FIELD_SECOND | STRUCT_FLOAT_FIELD_ONLY_TWO),
-          sir->m_structFields & STRUCT_SECOND_FIELD_SIZE_IS8 },
+        { (bool) (sir->m_structFields & (STRUCT_FLOAT_FIELD_FIRST | STRUCT_FLOAT_FIELD_ONLY_TWO | STRUCT_FLOAT_FIELD_ONLY_ONE)),
+          (bool) (sir->m_structFields & STRUCT_FIRST_FIELD_SIZE_IS8) },
+        { (bool) (sir->m_structFields & (STRUCT_FLOAT_FIELD_SECOND | STRUCT_FLOAT_FIELD_ONLY_TWO)),
+          (bool) (sir->m_structFields & STRUCT_SECOND_FIELD_SIZE_IS8) },
      };
      int fieldCount = (sir->m_structFields & STRUCT_FLOAT_FIELD_ONLY_ONE) ? 1 : 2;
      UINT64 bufferPosBegin = m_bufferPos;
@@ -191,11 +191,11 @@ LPVOID ProfileArgIterator::GetNextArgAddr()
          }
      }
  
-    int argSize = m_argIterator.IsArgPassedByRef() ? sizeof(void*) : m_argIterator.GetArgSize();
+    int argSize = m_argIterator.IsArgPassedByRef() ? (int)sizeof(void*) : m_argIterator.GetArgSize();
      if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset))
      {
          int offset = argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters();
-        _ASSERTE(offset + argSize <= sizeof(pData->floatArgumentRegisters));
+        _ASSERTE(offset + argSize <= (int)sizeof(pData->floatArgumentRegisters));
          return (LPBYTE)&pData->floatArgumentRegisters + offset;
      }
  
@@ -204,7 +204,7 @@ LPVOID ProfileArgIterator::GetNextArgAddr()
      if (TransitionBlock::IsArgumentRegisterOffset(argOffset))
      {
          int offset = argOffset - TransitionBlock::GetOffsetOfArgumentRegisters();
-        if (offset + argSize > sizeof(pData->argumentRegisters))
+        if (offset + argSize > (int)sizeof(pData->argumentRegisters))
          {
              // Struct partially spilled on stack
              const int regIndex = NUM_ARGUMENT_REGISTERS - 1;  // first part of struct must be in last register
diff --git a/src/coreclr/vm/riscv64/stubs.cpp b/src/coreclr/vm/riscv64/stubs.cpp

index 42c0230..f9f72e0 100644 (file)
--- a/src/coreclr/vm/riscv64/stubs.cpp
+++ b/src/coreclr/vm/riscv64/stubs.cpp
@@ -1062,7 +1062,7 @@ void StubLinkerCPU::EmitMovConstant(IntReg reg, UINT64 imm)
      if (high31 & 0x800)
      {
          // EmitAddImm does not allow negative immediate values, so use EmitSubImm.
-        EmitSubImm(reg, reg, ~high31 + 1 & 0xFFF);
+        EmitSubImm(reg, reg, (~high31 + 1) & 0xFFF);
      }
      else
      {
@@ -1560,7 +1560,24 @@ PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCOD
  
      BEGIN_DYNAMIC_HELPER_EMIT(32);
  
-    EmitHelperWithArg(p, rxOffset, pAllocator, arg, target);
+    const IntReg RegR0 = 0, RegT0 = 5, RegA0 = 10;
+
+    *(DWORD*)p = UTypeInstr(0x17, RegT0, 0);// auipc t0, 0
+    p += 4;
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA0, RegT0, 16);// ld a0, 16(t0)
+    p += 4;
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 24);// ld t0, 24(t0)
+    p += 4;
+    *(DWORD*)p = ITypeInstr(0x67, 0, RegR0, RegT0, 0);// jalr zero, 0(t0)
+    p += 4;
+
+    // label:
+    // arg
+    *(TADDR*)p = arg;
+    p += 8;
+    // target
+    *(PCODE*)p = target;
+    p += 8;
  
      END_DYNAMIC_HELPER_EMIT();
  }
@@ -1570,13 +1587,13 @@ void DynamicHelpers::EmitHelperWithArg(BYTE*& p, size_t rxOffset, LoaderAllocato
  {
      STANDARD_VM_CONTRACT;
  
-    const IntReg RegR0 = 0, RegT0 = 5, RegA0 = 10;
+    const IntReg RegR0 = 0, RegT0 = 5, RegA1 = 11;
  
      *(DWORD*)p = UTypeInstr(0x17, RegT0, 0);// auipc t0, 0
      p += 4;
-    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA0, RegT0, 16);// ld a0, 16(t0)
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA1, RegT0, 16);// ld a1, 16(t0)
      p += 4;
-    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 24);;// ld t0, 24(t0)
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 24);// ld t0, 24(t0)
      p += 4;
      *(DWORD*)p = ITypeInstr(0x67, 0, RegR0, RegT0, 0);// jalr zero, 0(t0)
      p += 4;
@@ -1772,7 +1789,7 @@ PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADD
  
      BEGIN_DYNAMIC_HELPER_EMIT(48);
  
-    const IntReg RegR0 = 0, RegT0 = 5, RegA2 = 12, RegA3 = 1;
+    const IntReg RegR0 = 0, RegT0 = 5, RegA2 = 12, RegA3 = 13;
  
      *(DWORD*)p = UTypeInstr(0x17, RegT0, 0);// auipc t0, 0
      p += 4;
@@ -1780,7 +1797,7 @@ PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADD
      p += 4;
      *(DWORD*)p = ITypeInstr(0x3, 0x3, RegA3, RegT0, 32);// ld a3,32(t0)
      p += 4;
-    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 40);;// ld t0,40(t0)
+    *(DWORD*)p = ITypeInstr(0x3, 0x3, RegT0, RegT0, 40);// ld t0,40(t0)
      p += 4;
      *(DWORD*)p = ITypeInstr(0x67, 0, RegR0, RegT0, 0);// jalr x0, 0(t0)
      p += 4;
@@ -1892,7 +1909,7 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator,
              p += 4;
          }
  
-        BYTE* pBLTCall = NULL;
+        BYTE* pBLECall = NULL;
  
          for (WORD i = 0; i < pLookup->indirections; i++)
          {
@@ -1922,8 +1939,8 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator,
                  p += 4;
                  *(DWORD*)p = ITypeInstr(0x13, 0, RegT4, RegT4, slotOffset & 0xfff);// addi  t4, t4, (slotOffset&0xfff)
                  p += 4;
-                // blt  t4, t5, CALL HELPER
-                pBLTCall = p;       // Offset filled later
+                // bge  t4, t5, CALL HELPER
+                pBLECall = p;       // Offset filled later
                  p += 4;
              }
  
@@ -1965,8 +1982,8 @@ PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator,
              p += 4;
  
              // CALL HELPER:
-            if (pBLTCall != NULL)
-                *(DWORD*)pBLTCall = BTypeInstr(0x63, 0x4, RegT4, RegT5, (UINT32)(p - pBLTCall));
+            if (pBLECall != NULL)
+                *(DWORD*)pBLECall = BTypeInstr(0x63, 0x5, RegT4, RegT5, (UINT32)(p - pBLECall));
  
              *(DWORD*)p = ITypeInstr(0x13, 0, RegA0, RegT2, 0);// addi  a0, t2, 0
              p += 4;
diff --git a/src/coreclr/vm/riscv64/virtualcallstubcpu.hpp b/src/coreclr/vm/riscv64/virtualcallstubcpu.hpp

index 0491dd8..2f39e4f 100644 (file)
--- a/src/coreclr/vm/riscv64/virtualcallstubcpu.hpp
+++ b/src/coreclr/vm/riscv64/virtualcallstubcpu.hpp
@@ -156,9 +156,9 @@ struct ResolveStub
  
  private:
      friend struct ResolveHolder;
-    const static int resolveEntryPointLen = 20;
-    const static int slowEntryPointLen = 4;
-    const static int failEntryPointLen = 9;
+    constexpr static int resolveEntryPointLen = 20;
+    constexpr static int slowEntryPointLen = 4;
+    constexpr static int failEntryPointLen = 9;
  
      DWORD _resolveEntryPoint[resolveEntryPointLen];
      DWORD _slowEntryPoint[slowEntryPointLen];
@@ -226,10 +226,12 @@ struct ResolveHolder
          //  addi t0, t0, -12
          _stub._resolveEntryPoint[n++] = 0xff428293;
  
+        constexpr size_t entryPointsLen = ResolveStub::resolveEntryPointLen+ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen;
+        constexpr size_t hashedTokenOffset = offsetof(ResolveStub, _hashedToken);
          //     lw  t6, 0(t0)  #t6 = this._hashedToken
-        _stub._resolveEntryPoint[n++] = 0x0002af83 | (33 << 22); //(20+4+9)*4<<20;
-        _ASSERTE((ResolveStub::resolveEntryPointLen+ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen) == 33);
-        _ASSERTE((33<<2) == (offsetof(ResolveStub, _hashedToken) -offsetof(ResolveStub, _resolveEntryPoint[0])));
+        _stub._resolveEntryPoint[n++] = 0x0002af83 | (hashedTokenOffset << 20);
+        static_assert_no_msg(entryPointsLen << 2 == hashedTokenOffset);
+        static_assert_no_msg(offsetof(ResolveStub, _resolveEntryPoint[0]) == 0);
  
          //     xor      t1, t1, t6
          _stub._resolveEntryPoint[n++] = 0x01f34333;
@@ -241,10 +243,10 @@ struct ResolveHolder
          _stub._resolveEntryPoint[n++] = 0x00cfdf9b;
          //     and  t1, t1, t6
          _stub._resolveEntryPoint[n++] = 0x01f37333;
+        constexpr size_t cacheAddressOffset = offsetof(ResolveStub, _cacheAddress);
          //     ld  t6, 0(t0)    # t6 = this._cacheAddress
-        _stub._resolveEntryPoint[n++] = 0x0002bf83 | (36 << 22); //(20+4+9+1+2)*4<<20;
-        _ASSERTE((ResolveStub::resolveEntryPointLen+ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen+1+2) == 36);
-        _ASSERTE((36<<2) == (offsetof(ResolveStub, _cacheAddress) -offsetof(ResolveStub, _resolveEntryPoint[0])));
+        _stub._resolveEntryPoint[n++] = 0x0002bf83 | (cacheAddressOffset << 20);
+        static_assert_no_msg((entryPointsLen+1+2) << 2 == cacheAddressOffset);
          //  add t1, t6, t1
          _stub._resolveEntryPoint[n++] = 0x006f8333;
          //     ld  t1, 0(t1)    # t1 = e = this._cacheAddress[i]
@@ -252,10 +254,10 @@ struct ResolveHolder
  
          //     ld  t6, 0(t1)    # t6 = Check mt == e.pMT;
          _stub._resolveEntryPoint[n++] = 0x00033f83 | ((offsetof(ResolveCacheElem, pMT) & 0xfff) << 20);
+        constexpr size_t tokenOffset = offsetof(ResolveStub, _token);
          //     ld  t2, 0(t0)  #  $t2 = this._token
-        _stub._resolveEntryPoint[n++] = 0x0002b383 | (38<<22);//(20+4+9+1+2+2)*4<<20;
-        _ASSERTE((ResolveStub::resolveEntryPointLen+ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen+1+4) == 38);
-        _ASSERTE((38<<2) == (offsetof(ResolveStub, _token) -offsetof(ResolveStub, _resolveEntryPoint[0])));
+        _stub._resolveEntryPoint[n++] = 0x0002b383 | (tokenOffset << 20);
+        static_assert_no_msg((entryPointsLen+1+4) << 2 == tokenOffset);
  
          //     bne  t6, t3, next
          _stub._resolveEntryPoint[n++] = 0x01cf9a63;// | PC_REL_OFFSET(_slowEntryPoint[0], n);
@@ -288,19 +290,19 @@ struct ResolveHolder
          //     auipc t0, 0
          _stub._slowEntryPoint[0] = 0x00000297;
          //     ld  t6, 0(t0)    # r21 = _resolveWorkerTarget;
-        _ASSERTE((0x14*4) == ((INT32)(offsetof(ResolveStub, _resolveWorkerTarget) - (offsetof(ResolveStub, _slowEntryPoint[0])))));
-        _ASSERTE((ResolveStub::slowEntryPointLen + ResolveStub::failEntryPointLen+1+3*2) == 0x14);
+        static_assert_no_msg((0x14*4) == ((INT32)(offsetof(ResolveStub, _resolveWorkerTarget) - (offsetof(ResolveStub, _slowEntryPoint[0])))));
+        static_assert_no_msg((ResolveStub::slowEntryPointLen + ResolveStub::failEntryPointLen+1+3*2) == 0x14);
          _stub._slowEntryPoint[1] = 0x0002bf83 | ((0x14 * 4) << 20);
  
          //     ld  t2, 0(t0)    # t2 = this._token;
          _stub._slowEntryPoint[2] = 0x0002b383 | ((0x12 * 4) << 20); //(18*4=72=0x48)<<20
-        _ASSERTE((ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen+1+4)*4 == (0x12 * 4));
-        _ASSERTE((0x12 * 4) == (offsetof(ResolveStub, _token) -offsetof(ResolveStub, _slowEntryPoint[0])));
+        static_assert_no_msg((ResolveStub::slowEntryPointLen+ResolveStub::failEntryPointLen+1+4)*4 == (0x12 * 4));
+        static_assert_no_msg((0x12 * 4) == (offsetof(ResolveStub, _token) -offsetof(ResolveStub, _slowEntryPoint[0])));
  
          //     jalr  x0, t6, 0
          _stub._slowEntryPoint[3] = 0x000f8067;
  
-         _ASSERTE(4 == ResolveStub::slowEntryPointLen);
+        static_assert_no_msg(4 == ResolveStub::slowEntryPointLen);
  
          // ResolveStub._failEntryPoint(a0:MethodToken, a1,.., a7, t5:IndirectionCellAndFlags)
          // {
@@ -315,8 +317,8 @@ struct ResolveHolder
          _stub._failEntryPoint[0] = 0x00000297;
          //     ld  t1, 0(t0)    # t1 = _pCounter;  0x2800000=((failEntryPointLen+1)*4)<<20.
          _stub._failEntryPoint[1] = 0x0002b303 | 0x2800000;
-        _ASSERTE((((ResolveStub::failEntryPointLen+1)*4)<<20) == 0x2800000);
-        _ASSERTE((0x2800000>>20) == ((INT32)(offsetof(ResolveStub, _pCounter) - (offsetof(ResolveStub, _failEntryPoint[0])))));
+        static_assert_no_msg((((ResolveStub::failEntryPointLen+1)*4)<<20) == 0x2800000);
+        static_assert_no_msg((0x2800000>>20) == ((INT32)(offsetof(ResolveStub, _pCounter) - (offsetof(ResolveStub, _failEntryPoint[0])))));
          //     lw  t6, 0(t1)
          _stub._failEntryPoint[2] = 0x00032f83;
          //     addi  t6, t6, -1
@@ -325,7 +327,7 @@ struct ResolveHolder
          //     sw  t6, 0(t1)
          _stub._failEntryPoint[4] = 0x01f32023;
  
-        _ASSERTE(SDF_ResolveBackPatch == 0x1);
+        static_assert_no_msg(SDF_ResolveBackPatch == 0x1);
          // ;; ori t5, t5, t6 >=0 ? SDF_ResolveBackPatch:0;
          //     slti t6, t6, 0
          _stub._failEntryPoint[5] = 0x000faf93;
@@ -337,8 +339,8 @@ struct ResolveHolder
          //     j       _resolveEntryPoint   // pc - 128 = pc + 4 - resolveEntryPointLen * 4 - slowEntryPointLen * 4 - failEntryPointLen * 4;
          _stub._failEntryPoint[8] = 0xf81ff06f;
  
-        _ASSERTE(9 == ResolveStub::failEntryPointLen);
-         _stub._pCounter = counterAddr;
+        static_assert_no_msg(9 == ResolveStub::failEntryPointLen);
+        _stub._pCounter = counterAddr;
          _stub._hashedToken         = hashedToken << LOG2_PTRSIZE;
          _stub._cacheAddress        = (size_t) cacheAddr;
          _stub._token               = dispatchToken;
@@ -372,8 +374,30 @@ struct VTableCallStub
  
      inline size_t size()
      {
-        _ASSERTE(!"RISCV64:NYI");
-        return 0;
+        LIMITED_METHOD_CONTRACT;
+
+        BYTE* pStubCode = (BYTE *)this;
+
+
+        if ((*(DWORD*)(&pStubCode[12])) == 0x000e8067)
+        {
+            // jalr x0, t4, 0
+            return 20;//4*ins + slot = 4*4 + 4;
+        }
+
+        //auipc t1, 0
+        assert((*(DWORD*)(&pStubCode[4])) == 0x00000317);
+
+        size_t cbSize = 36;
+
+        // ld t4, 0(t4)
+        if ((*(DWORD*)(&pStubCode[16])) == 0x000ebe83)
+        {
+            if ((*(DWORD*)(&pStubCode[28])) == 0x000ebe83)
+                cbSize += 12;
+        }
+
+        return cbSize;
      }
  
      inline PCODE        entryPoint()        const { LIMITED_METHOD_CONTRACT;  return (PCODE)&_entryPoint[0]; }
@@ -402,8 +426,8 @@ struct VTableCallHolder
          STATIC_CONTRACT_WRAPPER;
          unsigned offsetOfIndirection = MethodTable::GetVtableOffset() + MethodTable::GetIndexOfVtableIndirection(slot) * TARGET_POINTER_SIZE;
          unsigned offsetAfterIndirection = MethodTable::GetIndexAfterVtableIndirection(slot) * TARGET_POINTER_SIZE;
-        int indirectionsCodeSize = (offsetOfIndirection >= 0x1000 ? 12 : 4) + (offsetAfterIndirection >= 0x1000 ? 12 : 4);
-        int indirectionsDataSize = (offsetOfIndirection >= 0x1000 ? 4 : 0) + (offsetAfterIndirection >= 0x1000 ? 4 : 0);
+        int indirectionsCodeSize = (offsetOfIndirection > 2047 ? 12 : 4) + (offsetAfterIndirection > 2047 ? 12 : 4);
+        int indirectionsDataSize = (offsetOfIndirection > 2047 ? 4 : 0) + (offsetAfterIndirection > 2047 ? 4 : 0);
          return 12 + indirectionsCodeSize + ((indirectionsDataSize > 0) ? (indirectionsDataSize + 4) : 0);
      }
  
@@ -444,15 +468,15 @@ void VTableCallHolder::Initialize(unsigned slot)
      *(UINT32*)p = 0x00053e83; // VTABLECALL_STUB_FIRST_DWORD
      p += 4;
  
-    if ((offsetOfIndirection >= 0x1000) || (offsetAfterIndirection >= 0x1000))
+    if ((offsetOfIndirection > 2047) || (offsetAfterIndirection > 2047))
      {
          *(UINT32*)p = 0x00000317; // auipc t1, 0
          p += 4;
      }
  
-    if (offsetOfIndirection >= 0x1000)
+    if (offsetOfIndirection > 2047)
      {
-        uint dataOffset = 20 + (offsetAfterIndirection >= 0x1000 ? 12 : 4);
+        uint dataOffset = 20 + (offsetAfterIndirection > 2047 ? 12 : 4);
  
          // lwu t3,dataOffset(t1)
          *(DWORD*)p = 0x00036e03 | ((UINT32)dataOffset << 20); p += 4;
@@ -467,13 +491,13 @@ void VTableCallHolder::Initialize(unsigned slot)
          *(DWORD*)p = 0x000ebe83 | ((UINT32)offsetOfIndirection << 20); p += 4;
      }
  
-    if (offsetAfterIndirection >= 0x1000)
+    if (offsetAfterIndirection > 2047)
      {
-        uint indirectionsCodeSize = (offsetOfIndirection >= 0x1000 ? 12 : 4);
-        uint indirectionsDataSize = (offsetOfIndirection >= 0x1000 ? 4 : 0);
+        uint indirectionsCodeSize = (offsetOfIndirection > 2047 ? 12 : 4);
+        uint indirectionsDataSize = (offsetOfIndirection > 2047 ? 4 : 0);
          uint dataOffset = 20 + indirectionsCodeSize + indirectionsDataSize;
  
-        // ldw t3,dataOffset(t1)
+        // lwu t3,dataOffset(t1)
          *(DWORD*)p = 0x00036e03 | ((UINT32)dataOffset << 20); p += 4;
          // add t4, t4, t3
          *(DWORD*)p = 0x01ce8eb3; p += 4;
@@ -490,12 +514,12 @@ void VTableCallHolder::Initialize(unsigned slot)
      *(UINT32*)p = 0x000e8067; p += 4;
  
      // data labels:
-    if (offsetOfIndirection >= 0x1000)
+    if (offsetOfIndirection > 2047)
      {
          *(UINT32*)p = (UINT32)offsetOfIndirection;
          p += 4;
      }
-    if (offsetAfterIndirection >= 0x1000)
+    if (offsetAfterIndirection > 2047)
      {
          *(UINT32*)p = (UINT32)offsetAfterIndirection;
          p += 4;
diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp

index e82f8b8..f1c0860 100644 (file)
--- a/src/coreclr/vm/virtualcallstub.cpp
+++ b/src/coreclr/vm/virtualcallstub.cpp
@@ -769,9 +769,7 @@ void VirtualCallStubManager::InitStatic()
      VirtualCallStubManagerManager::InitStatic();
  }
  
-// Static shutdown code.
-// At the moment, this doesn't do anything more than log statistics.
-void VirtualCallStubManager::UninitStatic()
+void VirtualCallStubManager::LogFinalStats()
  {
      CONTRACTL
      {
diff --git a/src/coreclr/vm/virtualcallstub.h b/src/coreclr/vm/virtualcallstub.h

index e6d89dc..f7e1921 100644 (file)
--- a/src/coreclr/vm/virtualcallstub.h
+++ b/src/coreclr/vm/virtualcallstub.h
@@ -236,13 +236,13 @@ public:
  
      // Set up static data structures - called during EEStartup
      static void InitStatic();
-    static void UninitStatic();
+    static void LogFinalStats();
  
      // Per instance initialization - called during AppDomain::Init and ::Uninit and for collectible loader allocators
      void Init(BaseDomain* pDomain, LoaderAllocator *pLoaderAllocator);
      void Uninit();
  
-    //@TODO: the logging should be tied into the VMs normal loggin mechanisms,
+    //@TODO: the logging should be tied into the VMs normal logging mechanisms,
      //@TODO: for now we just always write a short log file called "StubLog_<pid>.log"
      static void StartupLogging();
      static void LoggingDump();
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs

index 9c3360a..550cb0e 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -36,6 +36,9 @@ namespace System.Runtime.Intrinsics
          internal const int Alignment = 8;
  #elif TARGET_ARM64
          internal const int Alignment = 16;
+#elif TARGET_RISCV64
+        // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+        internal const int Alignment = 16;
  #else
          internal const int Alignment = 32;
  #endif
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs

index f95c2d3..98ddc2b 100644 (file)
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs
@@ -36,6 +36,9 @@ namespace System.Runtime.Intrinsics
          internal const int Alignment = 8;
  #elif TARGET_ARM64
          internal const int Alignment = 16;
+#elif TARGET_RISCV64
+        // TODO-RISCV64: Update alignment to proper value when we implement RISC-V intrinsic.
+        internal const int Alignment = 16;
  #else
          internal const int Alignment = 64;
  #endif
diff --git a/src/native/eventpipe/ep-event-source.c b/src/native/eventpipe/ep-event-source.c

index 8bbe3b6..d1d5424 100644 (file)
--- a/src/native/eventpipe/ep-event-source.c
+++ b/src/native/eventpipe/ep-event-source.c
@@ -48,6 +48,8 @@ const ep_char8_t* _ep_arch_info = "s390x";
  const ep_char8_t* _ep_arch_info = "loongarch64";
  #elif defined(TARGET_POWERPC64)
  const ep_char8_t* _ep_arch_info = "ppc64le";
+#elif defined(TARGET_RISCV64)
+const ep_char8_t* _ep_arch_info = "riscv64";
  #else
  const ep_char8_t* _ep_arch_info = "Unknown";
  #endif
diff --git a/src/tests/Interop/StructPacking/StructPacking.cs b/src/tests/Interop/StructPacking/StructPacking.cs

index 06b0450..0d5e1ff 100644 (file)
--- a/src/tests/Interop/StructPacking/StructPacking.cs
+++ b/src/tests/Interop/StructPacking/StructPacking.cs
@@ -1322,9 +1322,9 @@ unsafe class Program
                  expectedOffsetValue: 8
              );
          }
-        else if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
+        else if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64 || RuntimeInformation.ProcessArchitecture == Architecture.RiscV64)
          {
-            // The Procedure Call Standard for ARM64 defines this type as having 16-byte alignment
+            // The Procedure Call Standard for ARM64 and RiscV64 defines this type as having 16-byte alignment
  
              succeeded &= Test<DefaultLayoutDefaultPacking<Vector256<byte>>>(
                  expectedSize: 48,
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj b/src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj

index 5da8e9a..01cde35 100644 (file)
--- a/src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj
@@ -7,6 +7,7 @@
      <Optimize>True</Optimize>
      <TargetFramework>$(NetCoreAppToolCurrent)</TargetFramework>
      <GCStressIncompatible>True</GCStressIncompatible>
+    <OtherFlags>--tailcalls+</OtherFlags>
    </PropertyGroup>
    <ItemGroup>
      <Compile Include="$(MSBuildProjectName).fs" />
diff --git a/src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj b/src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj

index d6ca55a..29fc6f8 100644 (file)
--- a/src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj
+++ b/src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj
@@ -8,7 +8,7 @@
      <Compile Include="test22888resources.cs" />
    </ItemGroup>
    <ItemGroup>
-    <EmbeddedResource Include="test22888.resx">
+    <EmbeddedResource Include="test22888.resx" LogicalName="test22888resources.test22888.resources">
        <Generator>ResXFileCodeGenerator</Generator>
      </EmbeddedResource>
    </ItemGroup>
diff --git a/src/tests/issues.targets b/src/tests/issues.targets

index 5c9868e..60e82cd 100644 (file)
--- a/src/tests/issues.targets
+++ b/src/tests/issues.targets
@@ -5,9 +5,6 @@
          <ExcludeList Include="$(XunitTestBinBase)/readytorun/DynamicMethodGCStress/DynamicMethodGCStress/*">
              <Issue>timeout</Issue>
          </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)/Regressions/coreclr/GitHub_22888/test22888/*">
-            <Issue>https://github.com/dotnet/runtime/issues/13703</Issue>
-        </ExcludeList>
          <ExcludeList Include="$(XunitTestBinBase)/Interop/PInvoke/Int128/Int128Test/*">
              <Issue>https://github.com/dotnet/runtime/issues/74209</Issue>
          </ExcludeList>
@@ -1472,6 +1469,9 @@
          <ExcludeList Include="$(XunitTestBinBase)/Interop/ICustomMarshaler/ConflictingNames/MultipleALCs/**">
              <Issue>https://github.com/dotnet/runtime/issues/34072</Issue>
          </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/Regressions/coreclr/GitHub_22888/test22888/*">
+            <Issue>https://github.com/dotnet/runtime/issues/34072</Issue>
+        </ExcludeList>
          <ExcludeList Include="$(XunitTestBinBase)/Interop/ICustomMarshaler/Primitives/ICustomMarshaler_TargetUnix/**">
              <Issue>https://github.com/dotnet/runtime/issues/34374</Issue>
          </ExcludeList>
author	Timur Mustafin/Advanced System SW Lab /SRR/Staff Engineer/Samsung Electronics <t.mustafin@partner.samsung.com>
	Thu, 6 Jun 2024 22:12:52 +0000 (01:12 +0300)
committer	GitHub Enterprise <noreply-CODE@samsung.com>
	Thu, 6 Jun 2024 22:12:52 +0000 (07:12 +0900)
src/coreclr/clrdefinitions.cmake		patch \| blob \| history
src/coreclr/debug/ee/controller.cpp		patch \| blob \| history
src/coreclr/gcinfo/CMakeLists.txt		patch \| blob \| history
src/coreclr/inc/clrconfigvalues.h		patch \| blob \| history
src/coreclr/inc/stdmacros.h		patch \| blob \| history
src/coreclr/jit/codegen.h		patch \| blob \| history
src/coreclr/jit/codegencommon.cpp		patch \| blob \| history
src/coreclr/jit/codegenlinear.cpp		patch \| blob \| history
src/coreclr/jit/codegenriscv64.cpp		patch \| blob \| history
src/coreclr/jit/compiler.cpp		patch \| blob \| history
src/coreclr/jit/compiler.h		patch \| blob \| history
src/coreclr/jit/ee_il_dll.cpp		patch \| blob \| history
src/coreclr/jit/emit.cpp		patch \| blob \| history
src/coreclr/jit/emit.h		patch \| blob \| history
src/coreclr/jit/emitriscv64.cpp		patch \| blob \| history
src/coreclr/jit/emitriscv64.h		patch \| blob \| history
src/coreclr/jit/gcencode.cpp		patch \| blob \| history
src/coreclr/jit/gentree.h		patch \| blob \| history
src/coreclr/jit/instrsriscv64.h		patch \| blob \| history
src/coreclr/jit/jitconfigvalues.h		patch \| blob \| history
src/coreclr/jit/lclvars.cpp		patch \| blob \| history
src/coreclr/jit/lower.cpp		patch \| blob \| history
src/coreclr/jit/lowerriscv64.cpp		patch \| blob \| history
src/coreclr/jit/lsrariscv64.cpp		patch \| blob \| history
src/coreclr/jit/regalloc.cpp		patch \| blob \| history
src/coreclr/jit/targetriscv64.h		patch \| blob \| history
src/coreclr/pal/inc/pal_endian.h		patch \| blob \| history
src/coreclr/pal/inc/rt/ntimage.h		patch \| blob \| history
src/coreclr/pal/inc/unixasmmacrosriscv64.inc		patch \| blob \| history
src/coreclr/vm/callingconvention.h		patch \| blob \| history
src/coreclr/vm/ceemain.cpp		patch \| blob \| history
src/coreclr/vm/jitinterface.cpp		patch \| blob \| history
src/coreclr/vm/methodtable.cpp		patch \| blob \| history
src/coreclr/vm/methodtable.h		patch \| blob \| history
src/coreclr/vm/methodtablebuilder.cpp		patch \| blob \| history
src/coreclr/vm/riscv64/asmhelpers.S		patch \| blob \| history
src/coreclr/vm/riscv64/profiler.cpp		patch \| blob \| history
src/coreclr/vm/riscv64/stubs.cpp		patch \| blob \| history
src/coreclr/vm/riscv64/virtualcallstubcpu.hpp		patch \| blob \| history
src/coreclr/vm/virtualcallstub.cpp		patch \| blob \| history
src/coreclr/vm/virtualcallstub.h		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs		patch \| blob \| history
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs		patch \| blob \| history
src/native/eventpipe/ep-event-source.c		patch \| blob \| history
src/tests/Interop/StructPacking/StructPacking.cs		patch \| blob \| history
src/tests/JIT/Regression/JitBlue/Runtime_87393/Runtime_87393.fsproj		patch \| blob \| history
src/tests/Regressions/coreclr/GitHub_22888/test22888resources.csproj		patch \| blob \| history
src/tests/issues.targets		patch \| blob \| history