From eb3e09c9bf1d8808acf7d21f40ab0103121a0d60 Mon Sep 17 00:00:00 2001 From: Neumann Hon Date: Fri, 25 Feb 2022 02:37:05 -0500 Subject: [PATCH] [SystemZ] [z/OS] Add support for generating huge (1 MiB) stack frames in XPLINK64 This patch extends support for generating huge stack frames on 64-bit XPLINK by implementing the ABI-mandated call to the stack extension routine. Reviewed By: uweigand Differential Revision: https://reviews.llvm.org/D120450 --- llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 7 ++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 86 ++++++++++++++++++++++-- llvm/lib/Target/SystemZ/SystemZFrameLowering.h | 3 + llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 8 +++ llvm/lib/Target/SystemZ/SystemZScheduleZ13.td | 2 +- llvm/lib/Target/SystemZ/SystemZScheduleZ14.td | 2 +- llvm/lib/Target/SystemZ/SystemZScheduleZ15.td | 2 +- llvm/lib/Target/SystemZ/SystemZScheduleZ196.td | 2 +- llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td | 2 +- llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll | 16 +++++ 10 files changed, 119 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 46538f9..75f19e0 100644 --- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -262,6 +262,13 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) { emitCallInformation(CallType::BASR76); return; + case SystemZ::CallBASR_STACKEXT: + EmitToStreamer(*OutStreamer, MCInstBuilder(SystemZ::BASR) + .addReg(SystemZ::R3D) + .addReg(MI->getOperand(0).getReg())); + emitCallInformation(CallType::BASR33); + return; + case SystemZ::CallBRASL: LoweredMI = MCInstBuilder(SystemZ::BRASL) .addReg(SystemZ::R14D) diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 610627e..b22e1b2 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -1153,12 +1153,6 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize()); uint64_t StackSize = MFFrame.getStackSize(); - // FIXME: Implement support for large stack sizes, when the stack extension - // routine needs to be called. - if (StackSize > 1024 * 1024) { - llvm_unreachable("Huge Stack Frame not yet supported on z/OS"); - } - if (ZFI->getSpillGPRRegs().LowGPR) { // Skip over the GPR saves. if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) { @@ -1201,6 +1195,18 @@ void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF, emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta, ZII); + + // If the requested stack size is larger than the guard page, then we need + // to check if we need to call the stack extender. This requires adding a + // conditional branch, but splitting the prologue block is not possible at + // this point since it would invalidate the SaveBlocks / RestoreBlocks sets + // of PEI in the single block function case. Build a pseudo to be handled + // later by inlineStackProbe(). + const uint64_t GuardPageSize = 1024 * 1024; + if (StackSize > GuardPageSize) { + assert(StoreInstr && "Wrong insertion point"); + BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::XPLINK_STACKALLOC)); + } } if (HasFP) { @@ -1239,6 +1245,74 @@ void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF, } } +// Emit a compare of the stack pointer against the stack floor, and a call to +// the LE stack extender if needed. +void SystemZXPLINKFrameLowering::inlineStackProbe( + MachineFunction &MF, MachineBasicBlock &PrologMBB) const { + auto *ZII = + static_cast(MF.getSubtarget().getInstrInfo()); + + MachineInstr *StackAllocMI = nullptr; + for (MachineInstr &MI : PrologMBB) + if (MI.getOpcode() == SystemZ::XPLINK_STACKALLOC) { + StackAllocMI = &MI; + break; + } + if (StackAllocMI == nullptr) + return; + + MachineBasicBlock &MBB = PrologMBB; + const DebugLoc DL = StackAllocMI->getDebugLoc(); + + // The 2nd half of block MBB after split. + MachineBasicBlock *NextMBB; + + // Add new basic block for the call to the stack overflow function. + MachineBasicBlock *StackExtMBB = + MF.CreateMachineBasicBlock(MBB.getBasicBlock()); + MF.push_back(StackExtMBB); + + // LG r3,72(,r3) + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::LG), SystemZ::R3D) + .addReg(SystemZ::R3D) + .addImm(72) + .addReg(0); + // BASR r3,r3 + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::CallBASR_STACKEXT)) + .addReg(SystemZ::R3D); + + // LLGT r3,1208 + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LLGT), SystemZ::R3D) + .addReg(0) + .addImm(1208) + .addReg(0); + // CG r4,64(,r3) + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::CG)) + .addReg(SystemZ::R4D) + .addReg(SystemZ::R3D) + .addImm(64) + .addReg(0); + // JLL b'0100',F'37' + BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::BRC)) + .addImm(SystemZ::CCMASK_ICMP) + .addImm(SystemZ::CCMASK_CMP_LT) + .addMBB(StackExtMBB); + + NextMBB = SystemZ::splitBlockBefore(StackAllocMI, &MBB); + MBB.addSuccessor(NextMBB); + MBB.addSuccessor(StackExtMBB); + + // Add jump back from stack extension BB. + BuildMI(StackExtMBB, DL, ZII->get(SystemZ::J)).addMBB(NextMBB); + StackExtMBB->addSuccessor(NextMBB); + + StackAllocMI->eraseFromParent(); + + // Compute the live-in lists for the new blocks. + recomputeLiveIns(*NextMBB); + recomputeLiveIns(*StackExtMBB); +} + bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const { return (MF.getFrameInfo().hasVarSizedObjects()); } diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h index 2b3d7ef..bec83a9 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -127,6 +127,9 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void inlineStackProbe(MachineFunction &MF, + MachineBasicBlock &PrologMBB) const override; + bool hasFP(const MachineFunction &MF) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td index c47731b..ed7e3c0 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -42,6 +42,10 @@ let Defs = [R1D, R15D, CC], Uses = [R15D], hasNoSchedulingInfo = 1, hasSideEffects = 1 in def PROBED_STACKALLOC : Pseudo<(outs), (ins i64imm:$stacksize), []>; +let Defs = [R3D, CC], Uses = [R3D, R4D], hasNoSchedulingInfo = 1, + hasSideEffects = 1 in + def XPLINK_STACKALLOC : Pseudo<(outs), (ins), []>; + //===----------------------------------------------------------------------===// // Branch instructions //===----------------------------------------------------------------------===// @@ -285,6 +289,10 @@ let Predicates = [IsTargetXPLINK64] in { def CallBASR_XPLINK64 : Alias<4, (outs), (ins ADDR64:$R2, variable_ops), [(z_call ADDR64:$R2)]>; } + + let isCall = 1, Defs = [R3D, CC], Uses = [FPC] in { + def CallBASR_STACKEXT : Alias<4, (outs), (ins ADDR64:$R2), []>; + } } // Regular calls. diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td index ac92501..fd01a8a 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -168,7 +168,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; // Call def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td index 683b66a..3f40673 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ14.td @@ -169,7 +169,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; // Call def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td index 2ebdf50..6ae911c 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ15.td @@ -169,7 +169,7 @@ def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; // Call def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td index 51c87c2..173cf96 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -147,7 +147,7 @@ def : InstRW<[WLat1, FXU, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>; // Call def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, LSU, FXU2, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return diff --git a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td index 8f2379c..d206047 100644 --- a/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -152,7 +152,7 @@ def : InstRW<[WLat1, FXU, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>; // Call def : InstRW<[WLat1, FXU2, VBU, GroupAlone], (instregex "(Call)?BRAS$")>; def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BRASL(_XPLINK64)?$")>; -def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64)?$")>; +def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "(Call)?BAS(R)?(_XPLINK64|_STACKEXT)?$")>; def : InstRW<[WLat1, FXU2, LSU, GroupAlone], (instregex "TLS_(G|L)DCALL$")>; // Return diff --git a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll index 563896f..c7d7cdf 100644 --- a/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll +++ b/llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll @@ -312,6 +312,22 @@ define i64 @func5(i64 %n) { ret i64 %call } +; CHECK-LABEL: large_stack +; CHECK64: agfi 4, -1048768 +; CHECK64-NEXT: llgt 3, 1208 +; CHECK64-NEXT: cg 4, 64(3) +; CHECK64-NEXT: jhe +; CHECK64: * %bb.1: +; CHECK64: lg 3, 72(3) +; CHECK64: basr 3, 3 +; CHECK64: stmg 6, 7, 2064(4) +define void @large_stack() { + %arr = alloca [131072 x i64], align 8 + %ptr = bitcast [131072 x i64]* %arr to i8* + call i64 (i8*) @fun1(i8* %ptr) + ret void +} + declare i64 @fun(i64 %arg0) declare i64 @fun1(i8* %ptr) declare i64 @fun2(i64 %n, i64* %arr0, i64* %arr1) -- 2.7.4