From ea475c77ff9eab1de7d44684c8fb453b39f70081 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 8 Dec 2020 01:33:28 +0100 Subject: [PATCH] [SystemZFrameLowering] Don't overrwrite R1D (backchain) when probing. The loop-based probing done for stack clash protection altered R1D which corrupted the backchain value to be stored after the probing was done. By using R0D instead for the loop exit value, R1D is not modified. Review: Ulrich Weigand. Differential Revision: https://reviews.llvm.org/D92803 --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 52 ++++++++++-------- .../CodeGen/SystemZ/stack-clash-dynamic-alloca.ll | 8 +-- .../test/CodeGen/SystemZ/stack-clash-protection.ll | 63 ++++++++++++++++------ 3 files changed, 82 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 57529c8..0bfab12 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -488,15 +488,6 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, MFFrame.setStackSize(StackSize); if (StackSize) { - // Determine if we want to store a backchain. - bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); - - // If we need backchain, save current stack pointer. R1 is free at this - // point. - if (StoreBackchain) - BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR)) - .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); - // Allocate StackSize bytes. int64_t Delta = -int64_t(StackSize); const unsigned ProbeSize = TLI.getStackProbeSize(MF); @@ -512,18 +503,23 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF, .addImm(StackSize); } else { + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); + // If we need backchain, save current stack pointer. R1 is free at + // this point. + if (StoreBackchain) + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII); buildCFAOffs(MBB, MBBI, DL, SPOffsetFromCFA + Delta, ZII); + if (StoreBackchain) { + // The back chain is stored topmost with packed-stack. + int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0; + BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) + .addImm(Offset).addReg(0); + } } SPOffsetFromCFA += Delta; - - if (StoreBackchain) { - // The back chain is stored topmost with packed-stack. - int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0; - BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) - .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) - .addImm(Offset).addReg(0); - } } if (HasFP) { @@ -668,6 +664,11 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF, .addMemOperand(MMO); }; + bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); + if (StoreBackchain) + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR)) + .addReg(SystemZ::R1D, RegState::Define).addReg(SystemZ::R15D); + if (NumFullBlocks < 3) { // Emit unrolled probe statements. for (unsigned int i = 0; i < NumFullBlocks; i++) @@ -677,10 +678,11 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF, uint64_t LoopAlloc = ProbeSize * NumFullBlocks; SPOffsetFromCFA -= LoopAlloc; - BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R1D) + // Use R0D to hold the exit value. + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R0D) .addReg(SystemZ::R15D); - buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R1D, ZII); - emitIncrement(*MBB, MBBI, DL, SystemZ::R1D, -int64_t(LoopAlloc), ZII); + buildDefCFAReg(*MBB, MBBI, DL, SystemZ::R0D, ZII); + emitIncrement(*MBB, MBBI, DL, SystemZ::R0D, -int64_t(LoopAlloc), ZII); buildCFAOffs(*MBB, MBBI, DL, -int64_t(SystemZMC::CallFrameSize + LoopAlloc), ZII); @@ -693,7 +695,7 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF, MBB = LoopMBB; allocateAndProbe(*MBB, MBB->end(), ProbeSize, false/*EmitCFI*/); BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::CLGR)) - .addReg(SystemZ::R15D).addReg(SystemZ::R1D); + .addReg(SystemZ::R15D).addReg(SystemZ::R0D); BuildMI(*MBB, MBB->end(), DL, ZII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT).addMBB(MBB); @@ -708,6 +710,14 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF, if (Residual) allocateAndProbe(*MBB, MBBI, Residual, true/*EmitCFI*/); + if (StoreBackchain) { + // The back chain is stored topmost with packed-stack. + int Offset = usePackedStack(MF) ? SystemZMC::CallFrameSize - 8 : 0; + BuildMI(*MBB, MBBI, DL, ZII->get(SystemZ::STG)) + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D) + .addImm(Offset).addReg(0); + } + StackAllocMI->eraseFromParent(); } diff --git a/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll index 748f441..7b31876 100644 --- a/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/SystemZ/stack-clash-dynamic-alloca.ll @@ -92,14 +92,14 @@ define i32 @fun2(i32 %n) #0 "stack-probe-size"="4" { ; CHECK-NEXT: stmg %r11, %r15, 88(%r15) ; CHECK-NEXT: .cfi_offset %r11, -72 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: aghi %r1, -160 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -160 ; CHECK-NEXT: .cfi_def_cfa_offset 320 ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -8 ; CHECK-NEXT: cg %r0, 0(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: lgr %r11, %r15 diff --git a/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll index 8166af3..b105306 100644 --- a/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll +++ b/llvm/test/CodeGen/SystemZ/stack-clash-protection.ll @@ -48,14 +48,14 @@ define i32 @fun1() #0 { define i32 @fun2() #0 { ; CHECK-LABEL: fun2: ; CHECK: # %bb.0: -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: agfi %r1, -69632 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: agfi %r0, -69632 ; CHECK-NEXT: .cfi_def_cfa_offset 69792 ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: cg %r0, 4088(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB2_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB2_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: aghi %r15, -2544 @@ -81,15 +81,15 @@ define i32 @fun2() #0 { define void @fun3() #0 { ; CHECK-LABEL: fun3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: aghi %r1, -28672 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -28672 ; CHECK-NEXT: .cfi_def_cfa_offset 28832 ; CHECK-NEXT: .LBB3_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -4096 ; CHECK-NEXT: cg %r0, 4088(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB3_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB3_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: mvhi 180(%r15), 0 @@ -110,15 +110,15 @@ entry: define void @fun4() #0 "stack-probe-size"="8192" { ; CHECK-LABEL: fun4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: aghi %r1, -24576 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -24576 ; CHECK-NEXT: .cfi_def_cfa_offset 24736 ; CHECK-NEXT: .LBB4_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -8192 ; CHECK-NEXT: cg %r0, 8184(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB4_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB4_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: aghi %r15, -7608 @@ -166,15 +166,15 @@ entry: define void @fun6() #0 "stack-probe-size"="5" { ; CHECK-LABEL: fun6: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: .cfi_def_cfa_register %r1 -; CHECK-NEXT: aghi %r1, -4184 +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -4184 ; CHECK-NEXT: .cfi_def_cfa_offset 4344 ; CHECK-NEXT: .LBB6_1: # %entry ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: aghi %r15, -8 ; CHECK-NEXT: cg %r0, 0(%r15) -; CHECK-NEXT: clgrjh %r15, %r1, .LBB6_1 +; CHECK-NEXT: clgrjh %r15, %r0, .LBB6_1 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: .cfi_def_cfa_register %r15 ; CHECK-NEXT: mvhi 180(%r15), 0 @@ -237,6 +237,37 @@ define i32 @fun8() #0 { ret i32 %c } +define void @fun9() #0 "backchain" { +; CHECK-LABEL: fun9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r0, %r15 +; CHECK-NEXT: lgr %r1, %r15 +; CHECK-NEXT: .cfi_def_cfa_register %r0 +; CHECK-NEXT: aghi %r0, -28672 +; CHECK-NEXT: .cfi_def_cfa_offset 28832 +; CHECK-NEXT: .LBB9_1: # %entry +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: aghi %r15, -4096 +; CHECK-NEXT: cg %r0, 4088(%r15) +; CHECK-NEXT: clgrjh %r15, %r0, .LBB9_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: .cfi_def_cfa_register %r15 +; CHECK-NEXT: stg %r1, 0(%r15) +; CHECK-NEXT: mvhi 180(%r15), 0 +; CHECK-NEXT: l %r0, 180(%r15) +; CHECK-NEXT: aghi %r15, 28672 +; CHECK-NEXT: br %r14 +entry: + %stack = alloca [7122 x i32], align 4 + %i = alloca i32, align 4 + %0 = bitcast [7122 x i32]* %stack to i8* + %i.0.i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 0, i32* %i, align 4 + %i.0.i.0.6 = load volatile i32, i32* %i, align 4 + ret void +} + + declare i32 @foo() attributes #0 = { "probe-stack"="inline-asm" } -- 2.7.4