From 3f02d269433e1c9b12dee9bf81ea3ec98c212548 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 20 Apr 2021 11:41:32 +0100 Subject: [PATCH] [RISCV] Further fixes for RVV stack offset computation This patch fixes a case missed out by D100574, in which RVV scalable stack offset computations may require three live registers in the case where the offset's fixed component is 12 bits or larger and has a scalable component. Instead of adding an additional emergency spill slot, this patch further optimizes the scalable stack offset computation sequences to reduce register usage. By emitting the sequence to compute the scalable component before the fixed component, we can free up one scratch register to be reallocated by the sequence for the fixed component. Doing this saves one register and thus one additional emergency spill slot. Compare: $x5 = LUI 1 $x1 = ADDIW killed $x5, -1896 $x1 = ADD $x2, killed $x1 $x5 = PseudoReadVLENB $x6 = ADDI $x0, 50 $x5 = MUL killed $x5, killed $x6 $x1 = ADD killed $x1, killed $x5 versus: $x5 = PseudoReadVLENB $x1 = ADDI $x0, 50 $x5 = MUL killed $x5, killed $x1 $x1 = LUI 1 $x1 = ADDIW killed $x1, -1896 $x1 = ADD $x2, killed $x1 $x1 = ADD killed $x1, killed $x5 Reviewed By: HsiangKai Differential Revision: https://reviews.llvm.org/D100847 --- llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 39 ++++++++++++++-------- .../CodeGen/RISCV/rvv/addi-scalable-offset.mir | 2 +- llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir | 30 ++++++++--------- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index cb501ed..8edfa9f 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -213,6 +213,24 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); bool FrameRegIsKill = false; + // If required, pre-compute the scalable factor amount which will be used in + // later offset computation. Since this sequence requires up to two scratch + // registers -- after which one is made free -- this grants us better + // scavenging of scratch registers as only up to two are live at one time, + // rather than three. + Register ScalableFactorRegister; + unsigned ScalableAdjOpc = RISCV::ADD; + if (Offset.getScalable()) { + int64_t ScalableValue = Offset.getScalable(); + if (ScalableValue < 0) { + ScalableValue = -ScalableValue; + ScalableAdjOpc = RISCV::SUB; + } + // 1. Get vlenb && multiply vlen with the number of vector registers. + ScalableFactorRegister = + TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue); + } + if (!isInt<12>(Offset.getFixed())) { // The offset won't fit in an immediate, so use a scratch register instead // Modify Offset and FrameReg appropriately @@ -251,29 +269,22 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } else { // Offset = (fixed offset, scalable offset) - unsigned Opc = RISCV::ADD; - int64_t ScalableValue = Offset.getScalable(); - if (ScalableValue < 0) { - ScalableValue = -ScalableValue; - Opc = RISCV::SUB; - } - - // 1. Get vlenb && multiply vlen with number of vector register. - Register FactorRegister = - TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue); + // Step 1, the scalable offset, has already been computed. + assert(ScalableFactorRegister && + "Expected pre-computation of scalable factor in earlier step"); // 2. Calculate address: FrameReg + result of multiply if (MI.getOpcode() == RISCV::ADDI && !Offset.getFixed()) { - BuildMI(MBB, II, DL, TII->get(Opc), MI.getOperand(0).getReg()) + BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), MI.getOperand(0).getReg()) .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addReg(FactorRegister, RegState::Kill); + .addReg(ScalableFactorRegister, RegState::Kill); MI.eraseFromParent(); return; } Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); - BuildMI(MBB, II, DL, TII->get(Opc), VL) + BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), VL) .addReg(FrameReg, getKillRegState(FrameRegIsKill)) - .addReg(FactorRegister, RegState::Kill); + .addReg(ScalableFactorRegister, RegState::Kill); if (isRVV && Offset.getFixed()) { // Scalable load/store has no immediate argument. We need to add the diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir index efb6656..fcba9fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -40,10 +40,10 @@ body: | ; CHECK: $x2 = SUB $x2, killed $x12 ; CHECK: dead renamable $x11 = PseudoVSETVLI killed renamable $x11, 88, implicit-def $vl, implicit-def $vtype ; CHECK: renamable $v25 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 64, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) + ; CHECK: $x11 = PseudoReadVLENB ; CHECK: $x10 = LUI 1048575 ; CHECK: $x10 = ADDIW killed $x10, 1824 ; CHECK: $x10 = ADD $x8, killed $x10 - ; CHECK: $x11 = PseudoReadVLENB ; CHECK: $x10 = SUB killed $x10, killed $x11 ; CHECK: VS1R_V killed renamable $v25, killed renamable $x10 ; CHECK: $x10 = PseudoReadVLENB diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir index ca1dbc2..44fc7c3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir +++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir @@ -90,12 +90,12 @@ body: | ; CHECK: $x2 = ANDI $x2, -128 ; CHECK: dead renamable $x15 = PseudoVSETIVLI 1, 72, implicit-def $vl, implicit-def $vtype ; CHECK: renamable $v25 = PseudoVMV_V_X_M1 killed renamable $x12, $noreg, 16, implicit $vl, implicit $vtype + ; CHECK: $x11 = PseudoReadVLENB + ; CHECK: $x10 = ADDI $x0, 50 + ; CHECK: $x11 = MUL killed $x11, killed $x10 ; CHECK: $x10 = LUI 1 ; CHECK: $x10 = ADDIW killed $x10, -1896 ; CHECK: $x10 = ADD $x2, killed $x10 - ; CHECK: $x11 = PseudoReadVLENB - ; CHECK: $x12 = ADDI $x0, 50 - ; CHECK: $x11 = MUL killed $x11, killed $x12 ; CHECK: $x10 = ADD killed $x10, killed $x11 ; CHECK: PseudoVSPILL_M1 killed renamable $v25, killed $x10 :: (store unknown-size into %stack.1, align 8) ; CHECK: renamable $x1 = ADDI $x0, 255 @@ -133,25 +133,25 @@ body: | ; CHECK: renamable $x9 = SRLI killed renamable $x9, 62 ; CHECK: renamable $x9 = ADD renamable $x13, killed renamable $x9 ; CHECK: renamable $x9 = ANDI killed renamable $x9, -4 - ; CHECK: renamable $x16 = SUB killed renamable $x13, killed renamable $x9 + ; CHECK: renamable $x16 = SUB killed renamable $x13, renamable $x9 ; CHECK: dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype ; CHECK: renamable $x13 = nsw ADDI renamable $x16, -2 - ; CHECK: $x5 = LUI 1 - ; CHECK: $x9 = ADDIW killed $x5, -1896 - ; CHECK: $x9 = ADD $x2, killed $x9 - ; CHECK: $x1 = PseudoReadVLENB - ; CHECK: $x5 = ADDI $x0, 50 - ; CHECK: $x1 = MUL killed $x1, killed $x5 + ; CHECK: $x5 = PseudoReadVLENB + ; CHECK: $x1 = ADDI $x0, 50 + ; CHECK: $x5 = MUL killed $x5, killed $x1 + ; CHECK: $x1 = LUI 1 + ; CHECK: $x1 = ADDIW killed $x1, -1896 + ; CHECK: $x1 = ADD $x2, killed $x1 + ; CHECK: $x1 = ADD killed $x1, killed $x5 ; CHECK: $x5 = LD $x2, 0 :: (load 8 from %stack.17) - ; CHECK: $x9 = ADD killed $x9, killed $x1 + ; CHECK: renamable $v0 = PseudoVRELOAD_M1 killed $x1 :: (load unknown-size from %stack.1, align 8) ; CHECK: $x1 = LD $x2, 8 :: (load 8 from %stack.16) - ; CHECK: renamable $v0 = PseudoVRELOAD_M1 killed $x9 :: (load unknown-size from %stack.1, align 8) ; CHECK: renamable $v0 = PseudoVSLIDEDOWN_VX_M1 undef renamable $v0, killed renamable $v0, killed renamable $x13, $noreg, 8, implicit $vl, implicit $vtype ; CHECK: renamable $x13 = PseudoVMV_X_S_M1 killed renamable $v0, 8, implicit $vl, implicit $vtype ; CHECK: BLT killed renamable $x16, renamable $x27, %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 + ; CHECK: liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 ; CHECK: renamable $x9 = COPY killed renamable $x13 ; CHECK: PseudoBR %bb.2 ; CHECK: bb.2: @@ -212,7 +212,7 @@ body: | renamable $x9 = SRLI killed renamable $x9, 62 renamable $x9 = ADD renamable $x13, killed renamable $x9 renamable $x9 = ANDI killed renamable $x9, -4 - renamable $x16 = SUB killed renamable $x13, killed renamable $x9 + renamable $x16 = SUB killed renamable $x13, renamable $x9 dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype renamable $x13 = nsw ADDI renamable $x16, -2 renamable $v0 = PseudoVRELOAD_M1 %stack.1 :: (load unknown-size from %stack.1, align 8) @@ -222,7 +222,7 @@ body: | bb.1: successors: %bb.2 - liveins: $x1, $x5, $x6, $x7, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 + liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 renamable $x9 = COPY killed renamable $x13 PseudoBR %bb.2 -- 2.7.4