From: serge-sans-paille Date: Tue, 27 Oct 2020 09:59:42 +0000 (+0100) Subject: [stack-clash] Fix probing of dynamic alloca X-Git-Tag: llvmorg-13-init~7608 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0f60bcc36c34522618bd1425a45f8c6006568fb6;p=platform%2Fupstream%2Fllvm.git [stack-clash] Fix probing of dynamic alloca - Perform the probing in the correct direction. Related to https://github.com/rust-lang/rust/pull/77885#issuecomment-711062924 - The first touch on a dynamic alloca cannot use a mov because it clobbers existing space. Use a xor 0 instead Differential Revision: https://reviews.llvm.org/D90216 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fb9fc84..28f4a3b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32316,7 +32316,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, BuildMI(testMBB, DL, TII->get(X86::JCC_1)) .addMBB(tailMBB) - .addImm(X86::COND_LE); + .addImm(X86::COND_GE); testMBB->addSuccessor(blockMBB); testMBB->addSuccessor(tailMBB); @@ -32332,9 +32332,9 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, // // The property we want to enforce is to never have more than [page alloc] between two probes. - const unsigned MovMIOpc = - TFI.Uses64BitFramePtr ? X86::MOV64mi32 : X86::MOV32mi; - addRegOffset(BuildMI(blockMBB, DL, TII->get(MovMIOpc)), physSPReg, false, 0) + const unsigned XORMIOpc = + TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8; + addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0) .addImm(0); BuildMI(blockMBB, DL, diff --git a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll index 82fd678..6dd8b6a 100644 --- a/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/X86/stack-clash-dynamic-alloca.ll @@ -24,12 +24,12 @@ attributes #0 = {"probe-stack"="inline-asm"} ; CHECK-X86-64-NEXT: andq $-16, %rcx ; CHECK-X86-64-NEXT: subq %rcx, %rax ; CHECK-X86-64-NEXT: cmpq %rsp, %rax -; CHECK-X86-64-NEXT: jle .LBB0_3 +; CHECK-X86-64-NEXT: jge .LBB0_3 ; CHECK-X86-64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 -; CHECK-X86-64-NEXT: movq $0, (%rsp) +; CHECK-X86-64-NEXT: xorq $0, (%rsp) ; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-X86-64-NEXT: cmpq %rsp, %rax -; CHECK-X86-64-NEXT: jg .LBB0_2 +; CHECK-X86-64-NEXT: jl .LBB0_2 ; CHECK-X86-64-NEXT: .LBB0_3: ; CHECK-X86-64-NEXT: movq %rax, %rsp ; CHECK-X86-64-NEXT: movl $1, 4792(%rax) @@ -54,12 +54,12 @@ attributes #0 = {"probe-stack"="inline-asm"} ; CHECK-X86-32-NEXT: andl $-16, %ecx ; CHECK-X86-32-NEXT: subl %ecx, %eax ; CHECK-X86-32-NEXT: cmpl %esp, %eax -; CHECK-X86-32-NEXT: jle .LBB0_3 +; CHECK-X86-32-NEXT: jge .LBB0_3 ; CHECK-X86-32-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1 -; CHECK-X86-32-NEXT: movl $0, (%esp) +; CHECK-X86-32-NEXT: xorl $0, (%esp) ; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 ; CHECK-X86-32-NEXT: cmpl %esp, %eax -; CHECK-X86-32-NEXT: jg .LBB0_2 +; CHECK-X86-32-NEXT: jl .LBB0_2 ; CHECK-X86-32-NEXT: .LBB0_3: ; CHECK-X86-32-NEXT: movl %eax, %esp ; CHECK-X86-32-NEXT: movl $1, 4792(%eax) diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll index eafa86f..39b6c36 100644 --- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll @@ -106,12 +106,12 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 { ; CHECK-NEXT: andq $-16, %rcx ; CHECK-NEXT: subq %rcx, %rax ; CHECK-NEXT: cmpq %rsp, %rax -; CHECK-NEXT: jle .LBB3_3 +; CHECK-NEXT: jge .LBB3_3 ; CHECK-NEXT:.LBB3_2: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: xorq $0, (%rsp) ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 ; CHECK-NEXT: cmpq %rsp, %rax -; CHECK-NEXT: jg .LBB3_2 +; CHECK-NEXT: jl .LBB3_2 ; CHECK-NEXT:.LBB3_3: ; CHECK-NEXT: andq $-64, %rax ; CHECK-NEXT: movq %rax, %rsp