From 9c5bda652cc27596ef920015d31604f1cd569b26 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 24 Oct 2018 21:09:34 +0000 Subject: [PATCH] [X86] Add *SP to tailcall register class to fix verifier error It's possible to do a tail call to a stack argument. LLVM already calculates the right stack offset to call through. Fixes the sibcall* and musttail* verifier failures tracked at PR27481. llvm-svn: 345197 --- llvm/lib/Target/X86/X86RegisterInfo.td | 7 ++-- llvm/test/CodeGen/X86/musttail-indirect.ll | 4 +- llvm/test/CodeGen/X86/musttail-thiscall.ll | 4 +- llvm/test/CodeGen/X86/musttail-varargs.ll | 4 +- llvm/test/CodeGen/X86/sibcall-2.ll | 4 +- llvm/test/CodeGen/X86/sibcall.ll | 63 ++++++++++++++++++++---------- 6 files changed, 54 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 31b9396..0c1b05f 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -436,11 +436,12 @@ def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>; def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>; def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>; def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>; -def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>; +def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESP)>; def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, - R8, R9, R11, RIP)>; + R8, R9, R11, RIP, RSP)>; def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, - R8, R9, R10, R11, RIP)>; + R8, R9, R10, R11, + RIP, RSP)>; // GR8_NOREX - GR8 registers which do not require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, diff --git a/llvm/test/CodeGen/X86/musttail-indirect.ll b/llvm/test/CodeGen/X86/musttail-indirect.ll index 7bb71c3..c142ffa 100644 --- a/llvm/test/CodeGen/X86/musttail-indirect.ll +++ b/llvm/test/CodeGen/X86/musttail-indirect.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=i686-win32 | FileCheck %s -; RUN: llc < %s -mtriple=i686-win32 -O0 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=i686-win32 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=i686-win32 -O0 | FileCheck %s ; IR simplified from the following C++ snippet compiled for i686-windows-msvc: diff --git a/llvm/test/CodeGen/X86/musttail-thiscall.ll b/llvm/test/CodeGen/X86/musttail-thiscall.ll index 454c66c..a1ddbd5 100644 --- a/llvm/test/CodeGen/X86/musttail-thiscall.ll +++ b/llvm/test/CodeGen/X86/musttail-thiscall.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=i686-- < %s | FileCheck %s -; RUN: llc -mtriple=i686-- -O0 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=i686-- < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=i686-- -O0 < %s | FileCheck %s ; CHECK-LABEL: t1: ; CHECK: jmp {{_?}}t1_callee diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll index 080e5e5..6a338c5c 100644 --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -83,7 +83,6 @@ define void @f_thunk(i8* %this, ...) { ; LINUX-NEXT: movq %rbp, %rdx ; LINUX-NEXT: movq %r13, %rcx ; LINUX-NEXT: movq %r12, %r8 -; LINUX-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; LINUX-NEXT: movq %r15, %r9 ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload @@ -93,6 +92,7 @@ define void @f_thunk(i8* %this, ...) { ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Reload ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload +; LINUX-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; LINUX-NEXT: addq $360, %rsp # imm = 0x168 ; LINUX-NEXT: .cfi_def_cfa_offset 56 ; LINUX-NEXT: popq %rbx @@ -177,7 +177,6 @@ define void @f_thunk(i8* %this, ...) { ; LINUX-X32-NEXT: movq %rbp, %rdx ; LINUX-X32-NEXT: movq %r13, %rcx ; LINUX-X32-NEXT: movq %r12, %r8 -; LINUX-X32-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; LINUX-X32-NEXT: movq %r15, %r9 ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload @@ -187,6 +186,7 @@ define void @f_thunk(i8* %this, ...) { ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload +; LINUX-X32-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; LINUX-X32-NEXT: addl $344, %esp # imm = 0x158 ; LINUX-X32-NEXT: .cfi_def_cfa_offset 56 ; LINUX-X32-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/sibcall-2.ll b/llvm/test/CodeGen/X86/sibcall-2.ll index 1b9d2db..6ed7b5a 100644 --- a/llvm/test/CodeGen/X86/sibcall-2.ll +++ b/llvm/test/CodeGen/X86/sibcall-2.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s -check-prefix=32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s -check-prefix=64 +; RUN: llc -verify-machineinstrs < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s -check-prefix=32 +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s -check-prefix=64 ; Tail call should not use ebp / rbp after it's popped. Use esp / rsp. diff --git a/llvm/test/CodeGen/X86/sibcall.ll b/llvm/test/CodeGen/X86/sibcall.ll index 784b10b..2b4af2e 100644 --- a/llvm/test/CodeGen/X86/sibcall.ll +++ b/llvm/test/CodeGen/X86/sibcall.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-linux -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X32 +; RUN: llc -verify-machineinstrs < %s -mtriple=i686-linux -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-linux -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X64 +; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-linux-gnux32 -mcpu=core2 -mattr=+sse2 | FileCheck %s --check-prefix=X32 define void @t1(i32 %x) nounwind ssp { ; X86-LABEL: t1: @@ -101,41 +101,62 @@ define void @t5(void ()* nocapture %x) nounwind ssp { ret void } +; Basically the same test as t5, except pass the function pointer on the stack +; for x86_64. + +define void @t5_x64(i32, i32, i32, i32, i32, i32, void ()* nocapture %x) nounwind ssp { +; X86-LABEL: t5_x64: +; X86: # %bb.0: +; X86-NEXT: jmpl *{{[0-9]+}}(%esp) # TAILCALL +; +; X64-LABEL: t5_x64: +; X64: # %bb.0: +; X64-NEXT: jmpq *{{[0-9]+}}(%rsp) # TAILCALL +; +; X32-LABEL: t5_x64: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: jmpq *%rax # TAILCALL + tail call void %x() nounwind + ret void +} + + define i32 @t6(i32 %x) nounwind ssp { ; X86-LABEL: t6: ; X86: # %bb.0: ; X86-NEXT: subl $12, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: cmpl $9, %eax -; X86-NEXT: jg .LBB5_2 +; X86-NEXT: jg .LBB6_2 ; X86-NEXT: # %bb.1: # %bb ; X86-NEXT: decl %eax ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll t6 ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl -; X86-NEXT: .LBB5_2: # %bb1 +; X86-NEXT: .LBB6_2: # %bb1 ; X86-NEXT: addl $12, %esp ; X86-NEXT: jmp bar # TAILCALL ; ; X64-LABEL: t6: ; X64: # %bb.0: ; X64-NEXT: cmpl $9, %edi -; X64-NEXT: jg .LBB5_2 +; X64-NEXT: jg .LBB6_2 ; X64-NEXT: # %bb.1: # %bb ; X64-NEXT: decl %edi ; X64-NEXT: jmp t6 # TAILCALL -; X64-NEXT: .LBB5_2: # %bb1 +; X64-NEXT: .LBB6_2: # %bb1 ; X64-NEXT: jmp bar # TAILCALL ; ; X32-LABEL: t6: ; X32: # %bb.0: ; X32-NEXT: cmpl $9, %edi -; X32-NEXT: jg .LBB5_2 +; X32-NEXT: jg .LBB6_2 ; X32-NEXT: # %bb.1: # %bb ; X32-NEXT: decl %edi ; X32-NEXT: jmp t6 # TAILCALL -; X32-NEXT: .LBB5_2: # %bb1 +; X32-NEXT: .LBB6_2: # %bb1 ; X32-NEXT: jmp bar # TAILCALL %t0 = icmp slt i32 %x, 10 br i1 %t0, label %bb, label %bb1 @@ -245,30 +266,30 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp { ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: testl %eax, %eax -; X86-NEXT: je .LBB10_1 +; X86-NEXT: je .LBB11_1 ; X86-NEXT: # %bb.2: # %bb ; X86-NEXT: jmp foo5 # TAILCALL -; X86-NEXT: .LBB10_1: # %bb6 +; X86-NEXT: .LBB11_1: # %bb6 ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: t11: ; X64: # %bb.0: # %entry ; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB10_1 +; X64-NEXT: je .LBB11_1 ; X64-NEXT: # %bb.2: # %bb ; X64-NEXT: jmp foo5 # TAILCALL -; X64-NEXT: .LBB10_1: # %bb6 +; X64-NEXT: .LBB11_1: # %bb6 ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq ; ; X32-LABEL: t11: ; X32: # %bb.0: # %entry ; X32-NEXT: testl %edi, %edi -; X32-NEXT: je .LBB10_1 +; X32-NEXT: je .LBB11_1 ; X32-NEXT: # %bb.2: # %bb ; X32-NEXT: jmp foo5 # TAILCALL -; X32-NEXT: .LBB10_1: # %bb6 +; X32-NEXT: .LBB11_1: # %bb6 ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: retq entry: @@ -292,30 +313,30 @@ define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp { ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: testl %eax, %eax -; X86-NEXT: je .LBB11_1 +; X86-NEXT: je .LBB12_1 ; X86-NEXT: # %bb.2: # %bb ; X86-NEXT: jmp foo6 # TAILCALL -; X86-NEXT: .LBB11_1: # %bb2 +; X86-NEXT: .LBB12_1: # %bb2 ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: t12: ; X64: # %bb.0: # %entry ; X64-NEXT: testl %edi, %edi -; X64-NEXT: je .LBB11_1 +; X64-NEXT: je .LBB12_1 ; X64-NEXT: # %bb.2: # %bb ; X64-NEXT: jmp foo6 # TAILCALL -; X64-NEXT: .LBB11_1: # %bb2 +; X64-NEXT: .LBB12_1: # %bb2 ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq ; ; X32-LABEL: t12: ; X32: # %bb.0: # %entry ; X32-NEXT: testl %edi, %edi -; X32-NEXT: je .LBB11_1 +; X32-NEXT: je .LBB12_1 ; X32-NEXT: # %bb.2: # %bb ; X32-NEXT: jmp foo6 # TAILCALL -; X32-NEXT: .LBB11_1: # %bb2 +; X32-NEXT: .LBB12_1: # %bb2 ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: retq entry: -- 2.7.4