[RISCV] Use max pushed register to get pushed register number.
authorYeting Kuo <yeting.kuo@sifive.com>
Thu, 3 Aug 2023 06:35:09 +0000 (14:35 +0800)
committerTobias Hieta <tobias@hieta.se>
Mon, 7 Aug 2023 07:04:13 +0000 (09:04 +0200)
Previously we used the number of registers needed saved and pushable as the
number of pushed registers. We also use pushed register number to caculate
the stack size. It is not correct because Zcmp pushes registers from $ra to the
max register needed saved and there is no gurantee that the needed saved
registers are a sequenced list from $ra.

There is an example about that. PushPopRegs should be 6 (ra,s0 - s4)= instead of 1.
```
; llc -mtriple=riscv32 -mattr=+zcmp
define void @foo() {
entry:
; Old:    .cfi_def_cfa_offset 16
; New:    .cfi_def_cfa_offset 32
  tail call void asm sideeffect "li s4, 0", "~{s4}"()
  ret void
}
```

Reviewed By: Jim, kito-cheng

Differential Revision: https://reviews.llvm.org/D156407

(cherry picked from commit f68c6879ad0e08e6509b89f60ed436d3be409f9c)

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
llvm/test/CodeGen/RISCV/callee-saved-gprs.ll

index b38c7ae..cb2a49d 100644 (file)
@@ -262,22 +262,16 @@ static unsigned getPushPopEncoding(const Register MaxReg) {
 
 // Get the max reg of Push/Pop for restoring callee saved registers.
 static Register getMaxPushPopReg(const MachineFunction &MF,
-                                 const std::vector<CalleeSavedInfo> &CSI,
-                                 unsigned &PushPopRegs) {
+                                 const std::vector<CalleeSavedInfo> &CSI) {
   Register MaxPushPopReg = RISCV::NoRegister;
-  PushPopRegs = 0;
   for (auto &CS : CSI) {
     Register Reg = CS.getReg();
-    if (RISCV::PGPRRegClass.contains(Reg)) {
+    if (RISCV::PGPRRegClass.contains(Reg))
       MaxPushPopReg = std::max(MaxPushPopReg.id(), Reg.id());
-      PushPopRegs += 1;
-    }
   }
   // if rlist is {rs, s0-s10}, then s11 will also be included
-  if (MaxPushPopReg == RISCV::X26) {
+  if (MaxPushPopReg == RISCV::X26)
     MaxPushPopReg = RISCV::X27;
-    PushPopRegs = 13;
-  }
   return MaxPushPopReg;
 }
 
@@ -1332,10 +1326,11 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
   // Emit CM.PUSH with base SPimm & evaluate Push stack
   RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
   if (RVFI->isPushable(*MF)) {
-    unsigned PushPopRegs = 0;
-    Register MaxReg = getMaxPushPopReg(*MF, CSI, PushPopRegs);
-    RVFI->setRVPushRegs(PushPopRegs);
-    RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushPopRegs, 16));
+    Register MaxReg = getMaxPushPopReg(*MF, CSI);
+    unsigned PushedRegNum =
+        getPushPopEncoding(MaxReg) - llvm::RISCVZC::RLISTENCODE::RA + 1;
+    RVFI->setRVPushRegs(PushedRegNum);
+    RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushedRegNum, 16));
 
     if (MaxReg != RISCV::NoRegister) {
       // Use encoded number to represent registers to spill.
@@ -1347,7 +1342,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
       PushBuilder.addImm((int64_t)RegEnc);
       PushBuilder.addImm(0);
 
-      for (unsigned i = 0; i < PushPopRegs; i++)
+      for (unsigned i = 0; i < PushedRegNum; i++)
         PushBuilder.addUse(AllPopRegs[i], RegState::Implicit);
     }
   } else if (const char *SpillLibCall = getSpillLibCallName(*MF, CSI)) {
index ab1af17..7013c27 100644 (file)
@@ -1990,3 +1990,140 @@ define void @caller() nounwind {
   store volatile [32 x i32] %val, ptr @var
   ret void
 }
+
+; This function tests if the stack size is correctly calculated when
+; callee-saved registers are not a sequential list from $ra
+define void @foo() {
+; RV32I-LABEL: foo:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset s4, -4
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    li s4, 0
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-WITH-FP-LABEL: foo:
+; RV32I-WITH-FP:       # %bb.0: # %entry
+; RV32I-WITH-FP-NEXT:    addi sp, sp, -16
+; RV32I-WITH-FP-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-WITH-FP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s4, 4(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    .cfi_offset ra, -4
+; RV32I-WITH-FP-NEXT:    .cfi_offset s0, -8
+; RV32I-WITH-FP-NEXT:    .cfi_offset s4, -12
+; RV32I-WITH-FP-NEXT:    addi s0, sp, 16
+; RV32I-WITH-FP-NEXT:    .cfi_def_cfa s0, 0
+; RV32I-WITH-FP-NEXT:    #APP
+; RV32I-WITH-FP-NEXT:    li s4, 0
+; RV32I-WITH-FP-NEXT:    #NO_APP
+; RV32I-WITH-FP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s4, 4(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    addi sp, sp, 16
+; RV32I-WITH-FP-NEXT:    ret
+;
+; RV32IZCMP-LABEL: foo:
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s4}, -32
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 32
+; RV32IZCMP-NEXT:    .cfi_offset s4, -4
+; RV32IZCMP-NEXT:    #APP
+; RV32IZCMP-NEXT:    li s4, 0
+; RV32IZCMP-NEXT:    #NO_APP
+; RV32IZCMP-NEXT:    cm.popret {ra, s0-s4}, 32
+;
+; RV32IZCMP-WITH-FP-LABEL: foo:
+; RV32IZCMP-WITH-FP:       # %bb.0: # %entry
+; RV32IZCMP-WITH-FP-NEXT:    addi sp, sp, -16
+; RV32IZCMP-WITH-FP-NEXT:    .cfi_def_cfa_offset 16
+; RV32IZCMP-WITH-FP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    sw s4, 4(sp) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    .cfi_offset ra, -4
+; RV32IZCMP-WITH-FP-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-WITH-FP-NEXT:    .cfi_offset s4, -12
+; RV32IZCMP-WITH-FP-NEXT:    addi s0, sp, 16
+; RV32IZCMP-WITH-FP-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-WITH-FP-NEXT:    #APP
+; RV32IZCMP-WITH-FP-NEXT:    li s4, 0
+; RV32IZCMP-WITH-FP-NEXT:    #NO_APP
+; RV32IZCMP-WITH-FP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    lw s4, 4(sp) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    addi sp, sp, 16
+; RV32IZCMP-WITH-FP-NEXT:    ret
+;
+; RV64I-LABEL: foo:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    .cfi_def_cfa_offset 16
+; RV64I-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset s4, -8
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    li s4, 0
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64I-WITH-FP-LABEL: foo:
+; RV64I-WITH-FP:       # %bb.0: # %entry
+; RV64I-WITH-FP-NEXT:    addi sp, sp, -32
+; RV64I-WITH-FP-NEXT:    .cfi_def_cfa_offset 32
+; RV64I-WITH-FP-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    .cfi_offset ra, -8
+; RV64I-WITH-FP-NEXT:    .cfi_offset s0, -16
+; RV64I-WITH-FP-NEXT:    .cfi_offset s4, -24
+; RV64I-WITH-FP-NEXT:    addi s0, sp, 32
+; RV64I-WITH-FP-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-WITH-FP-NEXT:    #APP
+; RV64I-WITH-FP-NEXT:    li s4, 0
+; RV64I-WITH-FP-NEXT:    #NO_APP
+; RV64I-WITH-FP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    addi sp, sp, 32
+; RV64I-WITH-FP-NEXT:    ret
+;
+; RV64IZCMP-LABEL: foo:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s4}, -48
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 48
+; RV64IZCMP-NEXT:    .cfi_offset s4, -8
+; RV64IZCMP-NEXT:    #APP
+; RV64IZCMP-NEXT:    li s4, 0
+; RV64IZCMP-NEXT:    #NO_APP
+; RV64IZCMP-NEXT:    cm.popret {ra, s0-s4}, 48
+;
+; RV64IZCMP-WITH-FP-LABEL: foo:
+; RV64IZCMP-WITH-FP:       # %bb.0: # %entry
+; RV64IZCMP-WITH-FP-NEXT:    addi sp, sp, -32
+; RV64IZCMP-WITH-FP-NEXT:    .cfi_def_cfa_offset 32
+; RV64IZCMP-WITH-FP-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    .cfi_offset ra, -8
+; RV64IZCMP-WITH-FP-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-WITH-FP-NEXT:    .cfi_offset s4, -24
+; RV64IZCMP-WITH-FP-NEXT:    addi s0, sp, 32
+; RV64IZCMP-WITH-FP-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-WITH-FP-NEXT:    #APP
+; RV64IZCMP-WITH-FP-NEXT:    li s4, 0
+; RV64IZCMP-WITH-FP-NEXT:    #NO_APP
+; RV64IZCMP-WITH-FP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    addi sp, sp, 32
+; RV64IZCMP-WITH-FP-NEXT:    ret
+entry:
+  tail call void asm sideeffect "li s4, 0", "~{s4}"()
+  ret void
+}