From aa8d33a6d6346e1ed444a59d0655f4a43ba96875 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Mon, 15 Mar 2021 13:58:11 +0800 Subject: [PATCH] [RISCV] Spilling for Zvlsseg registers. For Zvlsseg, we create several tuple register classes. When spilling for these tuple register classes, we need to iterate NF times to load/store these tuple registers. Differential Revision: https://reviews.llvm.org/D98629 --- llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp | 116 +++++++++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 165 +++++++++--- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 3 + llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 14 + llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 13 +- llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll | 299 ++++++++++++++++++++++ llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll | 299 ++++++++++++++++++++++ 7 files changed, 879 insertions(+), 30 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index ec9a395..581f26c6 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -62,6 +62,8 @@ private: bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandVMSET_VMCLR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opcode); + bool expandVSPILL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandVRELOAD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; char RISCVExpandPseudo::ID = 0; @@ -123,6 +125,30 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoVMSET_M_B64: // vmset.m vd => vmxnor.mm vd, vd, vd return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM); + case RISCV::PseudoVSPILL2_M1: + case RISCV::PseudoVSPILL2_M2: + case RISCV::PseudoVSPILL2_M4: + case RISCV::PseudoVSPILL3_M1: + case RISCV::PseudoVSPILL3_M2: + case RISCV::PseudoVSPILL4_M1: + case RISCV::PseudoVSPILL4_M2: + case RISCV::PseudoVSPILL5_M1: + case RISCV::PseudoVSPILL6_M1: + case RISCV::PseudoVSPILL7_M1: + case RISCV::PseudoVSPILL8_M1: + return expandVSPILL(MBB, MBBI); + case RISCV::PseudoVRELOAD2_M1: + case RISCV::PseudoVRELOAD2_M2: + case RISCV::PseudoVRELOAD2_M4: + case RISCV::PseudoVRELOAD3_M1: + case RISCV::PseudoVRELOAD3_M2: + case RISCV::PseudoVRELOAD4_M1: + case RISCV::PseudoVRELOAD4_M2: + case RISCV::PseudoVRELOAD5_M1: + case RISCV::PseudoVRELOAD6_M1: + case RISCV::PseudoVRELOAD7_M1: + case RISCV::PseudoVRELOAD8_M1: + return expandVRELOAD(MBB, MBBI); } return false; @@ -253,6 +279,96 @@ bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB, return true; } +bool RISCVExpandPseudo::expandVSPILL(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + DebugLoc DL = MBBI->getDebugLoc(); + Register SrcReg = MBBI->getOperand(0).getReg(); + Register Base = MBBI->getOperand(1).getReg(); + Register VL = MBBI->getOperand(2).getReg(); + auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MBBI->getOpcode()); + if (!ZvlssegInfo) + return false; + unsigned NF = ZvlssegInfo->first; + unsigned LMUL = ZvlssegInfo->second; + assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations."); + unsigned Opcode = RISCV::VS1R_V; + unsigned SubRegIdx = RISCV::sub_vrm1_0; + static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, + "Unexpected subreg numbering"); + if (LMUL == 2) { + Opcode = RISCV::VS2R_V; + SubRegIdx = RISCV::sub_vrm2_0; + static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, + "Unexpected subreg numbering"); + } else if (LMUL == 4) { + Opcode = RISCV::VS4R_V; + SubRegIdx = RISCV::sub_vrm4_0; + static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, + "Unexpected subreg numbering"); + } else + assert(LMUL == 1 && "LMUL must be 1, 2, or 4."); + + for (unsigned I = 0; I < NF; ++I) { + BuildMI(MBB, MBBI, DL, TII->get(Opcode)) + .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I)) + .addReg(Base) + .addMemOperand(*(MBBI->memoperands_begin())); + if (I != NF - 1) + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADD), Base) + .addReg(Base) + .addReg(VL); + } + MBBI->eraseFromParent(); + return true; +} + +bool RISCVExpandPseudo::expandVRELOAD(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + DebugLoc DL = MBBI->getDebugLoc(); + Register DestReg = MBBI->getOperand(0).getReg(); + Register Base = MBBI->getOperand(1).getReg(); + Register VL = MBBI->getOperand(2).getReg(); + auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MBBI->getOpcode()); + if (!ZvlssegInfo) + return false; + unsigned NF = ZvlssegInfo->first; + unsigned LMUL = ZvlssegInfo->second; + assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations."); + unsigned Opcode = RISCV::VL1RE8_V; + unsigned SubRegIdx = RISCV::sub_vrm1_0; + static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, + "Unexpected subreg numbering"); + if (LMUL == 2) { + Opcode = RISCV::VL2RE8_V; + SubRegIdx = RISCV::sub_vrm2_0; + static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, + "Unexpected subreg numbering"); + } else if (LMUL == 4) { + Opcode = RISCV::VL4RE8_V; + SubRegIdx = RISCV::sub_vrm4_0; + static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, + "Unexpected subreg numbering"); + } else + assert(LMUL == 1 && "LMUL must be 1, 2, or 4."); + + for (unsigned I = 0; I < NF; ++I) { + BuildMI(MBB, MBBI, DL, TII->get(Opcode), + TRI->getSubReg(DestReg, SubRegIdx + I)) + .addReg(Base) + .addMemOperand(*(MBBI->memoperands_begin())); + if (I != NF - 1) + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADD), Base) + .addReg(Base) + .addReg(VL); + } + MBBI->eraseFromParent(); + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo", diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index a2ce359..7d205d7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -167,29 +167,56 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; - bool IsScalableVector = false; - if (RISCV::GPRRegClass.hasSubClassEq(RC)) + bool IsScalableVector = true; + bool IsZvlsseg = true; + if (RISCV::GPRRegClass.hasSubClassEq(RC)) { Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; - else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSH; - else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSW; - else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSD; - else if (RISCV::VRRegClass.hasSubClassEq(RC)) { + IsScalableVector = false; + } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVSPILL_M1; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVSPILL_M2; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVSPILL_M4; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVSPILL_M8; - IsScalableVector = true; - } else + IsZvlsseg = false; + } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL2_M1; + else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL2_M2; + else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL2_M4; + else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL3_M1; + else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL3_M2; + else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL4_M1; + else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL4_M2; + else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL5_M1; + else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL6_M1; + else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL7_M1; + else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVSPILL8_M1; + else llvm_unreachable("Can't store this register to stack slot"); if (IsScalableVector) { @@ -198,10 +225,16 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); MFI.setStackID(FI, TargetStackID::ScalableVector); - BuildMI(MBB, I, DL, get(Opcode)) - .addReg(SrcReg, getKillRegState(IsKill)) - .addFrameIndex(FI) - .addMemOperand(MMO); + auto MIB = BuildMI(MBB, I, DL, get(Opcode)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addMemOperand(MMO); + if (IsZvlsseg) { + // For spilling/reloading Zvlsseg registers, append the dummy field for + // the scaled vector length. The argument will be used when expanding + // these pseudo instructions. + MIB.addReg(RISCV::X0); + } } else { MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, @@ -228,29 +261,56 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; - bool IsScalableVector = false; - if (RISCV::GPRRegClass.hasSubClassEq(RC)) + bool IsScalableVector = true; + bool IsZvlsseg = true; + if (RISCV::GPRRegClass.hasSubClassEq(RC)) { Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; - else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLH; - else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLW; - else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLD; - else if (RISCV::VRRegClass.hasSubClassEq(RC)) { + IsScalableVector = false; + } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVRELOAD_M1; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVRELOAD_M2; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVRELOAD_M4; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVRELOAD_M8; - IsScalableVector = true; - } else + IsZvlsseg = false; + } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD2_M1; + else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD2_M2; + else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD2_M4; + else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD3_M1; + else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD3_M2; + else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD4_M1; + else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD4_M2; + else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD5_M1; + else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD6_M1; + else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD7_M1; + else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) + Opcode = RISCV::PseudoVRELOAD8_M1; + else llvm_unreachable("Can't load this register from stack slot"); if (IsScalableVector) { @@ -259,9 +319,15 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); MFI.setStackID(FI, TargetStackID::ScalableVector); - BuildMI(MBB, I, DL, get(Opcode), DstReg) - .addFrameIndex(FI) - .addMemOperand(MMO); + auto MIB = BuildMI(MBB, I, DL, get(Opcode), DstReg) + .addFrameIndex(FI) + .addMemOperand(MMO); + if (IsZvlsseg) { + // For spilling/reloading Zvlsseg registers, append the dummy field for + // the scaled vector length. The argument will be used when expanding + // these pseudo instructions. + MIB.addReg(RISCV::X0); + } } else { MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, @@ -1217,3 +1283,44 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, return FactorRegister; } + +Optional> +RISCVInstrInfo::isRVVSpillForZvlsseg(unsigned Opcode) const { + switch (Opcode) { + default: + return None; + case RISCV::PseudoVSPILL2_M1: + case RISCV::PseudoVRELOAD2_M1: + return std::make_pair(2u, 1u); + case RISCV::PseudoVSPILL2_M2: + case RISCV::PseudoVRELOAD2_M2: + return std::make_pair(2u, 2u); + case RISCV::PseudoVSPILL2_M4: + case RISCV::PseudoVRELOAD2_M4: + return std::make_pair(2u, 4u); + case RISCV::PseudoVSPILL3_M1: + case RISCV::PseudoVRELOAD3_M1: + return std::make_pair(3u, 1u); + case RISCV::PseudoVSPILL3_M2: + case RISCV::PseudoVRELOAD3_M2: + return std::make_pair(3u, 2u); + case RISCV::PseudoVSPILL4_M1: + case RISCV::PseudoVRELOAD4_M1: + return std::make_pair(4u, 1u); + case RISCV::PseudoVSPILL4_M2: + case RISCV::PseudoVRELOAD4_M2: + return std::make_pair(4u, 2u); + case RISCV::PseudoVSPILL5_M1: + case RISCV::PseudoVRELOAD5_M1: + return std::make_pair(5u, 1u); + case RISCV::PseudoVSPILL6_M1: + case RISCV::PseudoVRELOAD6_M1: + return std::make_pair(6u, 1u); + case RISCV::PseudoVSPILL7_M1: + case RISCV::PseudoVRELOAD7_M1: + return std::make_pair(7u, 1u); + case RISCV::PseudoVSPILL8_M1: + case RISCV::PseudoVRELOAD8_M1: + return std::make_pair(8u, 1u); + } +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index f15d61e..ae03d12 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -147,6 +147,9 @@ public: MachineBasicBlock::iterator II, int64_t Amount) const; + Optional> + isRVVSpillForZvlsseg(unsigned Opcode) const; + protected: const RISCVSubtarget &STI; }; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 006703e..583b639 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3171,6 +3171,20 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in { def PseudoVRELOAD_M8 : VPseudo; } +foreach lmul = MxList.m in { + foreach nf = NFSet.L in { + defvar vreg = SegRegClass.RC; + let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in { + def "PseudoVSPILL" # nf # "_" # lmul.MX : + Pseudo<(outs), (ins vreg:$rs1, GPR:$rs2, GPR:$vlenb), []>; + } + let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in { + def "PseudoVRELOAD" # nf # "_" # lmul.MX : + Pseudo<(outs vreg:$rs1), (ins GPR:$rs2, GPR:$vlenb), []>; + } + } +} + //===----------------------------------------------------------------------===// // 6. Configuration-Setting Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index e1cd29c..ad6d3af 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -195,7 +195,8 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, StackOffset Offset = getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg); bool isRVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()) || - isRVVWholeLoadStore(MI.getOpcode()); + isRVVWholeLoadStore(MI.getOpcode()) || + TII->isRVVSpillForZvlsseg(MI.getOpcode()); if (!isRVV) Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); @@ -268,6 +269,16 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!isRVV) MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); } + + MachineFrameInfo &MFI = MF.getFrameInfo(); + auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MI.getOpcode()); + if (ZvlssegInfo) { + int64_t ScalableValue = MFI.getObjectSize(FrameIndex) / ZvlssegInfo->first; + Register FactorRegister = + TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue); + MI.getOperand(FIOperandNum + 1) + .ChangeToRegister(FactorRegister, /*isDef=*/false); + } } Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll new file mode 100644 index 0000000..d549c03 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0 %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2 %s + +define @spill_zvlsseg_nxv1i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv1r.v v25, v1 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv2i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv1r.v v25, v1 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv4i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv2r.v v26, v2 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vs2r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv8i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv4r.v v28, v4 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs4r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: vs4r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg3_nxv4i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O0-NEXT: vlseg3e32.v v0, (a0) +; SPILL-O0-NEXT: vmv2r.v v26, v2 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: addi a3, zero, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O2-NEXT: vlseg3e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vs2r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2_v10m2 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: addi a1, zero, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,,} %0, 1 + ret %1 +} + +declare {,} @llvm.riscv.vlseg2.nxv1i32(i32* , i32) +declare {,} @llvm.riscv.vlseg2.nxv2i32(i32* , i32) +declare {,} @llvm.riscv.vlseg2.nxv4i32(i32* , i32) +declare {,} @llvm.riscv.vlseg2.nxv8i32(i32* , i32) +declare {,,} @llvm.riscv.vlseg3.nxv4i32(i32* , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll new file mode 100644 index 0000000..bbda998 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0 %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2 %s + +define @spill_zvlsseg_nxv1i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv1r.v v25, v1 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv2i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv1r.v v25, v1 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv4i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv2r.v v26, v2 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vs2r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv8i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv4r.v v28, v4 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs4r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: vs4r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg3_nxv4i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O0-NEXT: vlseg3e32.v v0, (a0) +; SPILL-O0-NEXT: vmv2r.v v26, v2 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: addi a3, zero, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O2-NEXT: vlseg3e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vs2r.v v0, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2_v10m2 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: addi a1, zero, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,,} %0, 1 + ret %1 +} + +declare {,} @llvm.riscv.vlseg2.nxv1i32(i32* , i64) +declare {,} @llvm.riscv.vlseg2.nxv2i32(i32* , i64) +declare {,} @llvm.riscv.vlseg2.nxv4i32(i32* , i64) +declare {,} @llvm.riscv.vlseg2.nxv8i32(i32* , i64) +declare {,,} @llvm.riscv.vlseg3.nxv4i32(i32* , i64) -- 2.7.4