From 09d5d1533910aee67eff44b5d407b76ccdb4a048 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 12 Aug 2019 17:43:26 +0000 Subject: [PATCH] [X86] Disable use of zmm registers for varargs musttail calls under prefer-vector-width=256 and min-legal-vector-width=256. Under this config, the v16f32 type we try to use isn't to a register class so the getRegClassFor call will fail. llvm-svn: 368594 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/musttail-fastcall.ll | 90 +++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 91091d9..54d2ac3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3348,7 +3348,7 @@ SDValue X86TargetLowering::LowerFormalArguments( // Find the largest legal vector type. MVT VecVT = MVT::Other; // FIXME: Only some x86_32 calling conventions support AVX512. - if (Subtarget.hasAVX512() && + if (Subtarget.useAVX512Regs() && (Is64Bit || (CallConv == CallingConv::X86_VectorCall || CallConv == CallingConv::Intel_OCL_BI))) VecVT = MVT::v16f32; diff --git a/llvm/test/CodeGen/X86/musttail-fastcall.ll b/llvm/test/CodeGen/X86/musttail-fastcall.ll index a95e0ff..22a2ec6 100644 --- a/llvm/test/CodeGen/X86/musttail-fastcall.ll +++ b/llvm/test/CodeGen/X86/musttail-fastcall.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX -; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 +; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL ; While we don't support varargs with fastcall, we do support forwarding. @@ -107,3 +108,90 @@ define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) { %a1 = add i32 %a0, %c ret i32 %a1 } + +; Repeat the test for vectorcall, which has XMM registers. + +define i32 @call_vector_thunk_prefer256() "min-legal-vector-width"="256" "prefer-vector-width"="256" { + %r = call x86_vectorcallcc i32 (...) @vector_thunk_prefer256(i32 inreg 1, i32 inreg 2, i32 3) + ret i32 %r +} + +define x86_vectorcallcc i32 @vector_thunk_prefer256(...) "min-legal-vector-width"="256" "prefer-vector-width"="256" { + call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0)) + %r = musttail call x86_vectorcallcc i32 (...) bitcast (i32 (i32, i32, i32)* @vector_target_prefer256 to i32 (...)*) (...) + ret i32 %r +} + +; Check that we spill and fill SSE registers around the call to puts. + +; CHECK-LABEL: vector_thunk_prefer256@@0: +; CHECK-DAG: movl %ecx, {{.*}} +; CHECK-DAG: movl %edx, {{.*}} + +; SSE2-DAG: movups %xmm0, {{.*}} +; SSE2-DAG: movups %xmm1, {{.*}} +; SSE2-DAG: movups %xmm2, {{.*}} +; SSE2-DAG: movups %xmm3, {{.*}} +; SSE2-DAG: movups %xmm4, {{.*}} +; SSE2-DAG: movups %xmm5, {{.*}} + +; AVX-DAG: vmovups %ymm0, {{.*}} +; AVX-DAG: vmovups %ymm1, {{.*}} +; AVX-DAG: vmovups %ymm2, {{.*}} +; AVX-DAG: vmovups %ymm3, {{.*}} +; AVX-DAG: vmovups %ymm4, {{.*}} +; AVX-DAG: vmovups %ymm5, {{.*}} + +; AVX512F-DAG: vmovups %zmm0, {{.*}} +; AVX512F-DAG: vmovups %zmm1, {{.*}} +; AVX512F-DAG: vmovups %zmm2, {{.*}} +; AVX512F-DAG: vmovups %zmm3, {{.*}} +; AVX512F-DAG: vmovups %zmm4, {{.*}} +; AVX512F-DAG: vmovups %zmm5, {{.*}} + +; AVX512VL-DAG: vmovups %ymm0, {{.*}} +; AVX512VL-DAG: vmovups %ymm1, {{.*}} +; AVX512VL-DAG: vmovups %ymm2, {{.*}} +; AVX512VL-DAG: vmovups %ymm3, {{.*}} +; AVX512VL-DAG: vmovups %ymm4, {{.*}} +; AVX512VL-DAG: vmovups %ymm5, {{.*}} + +; CHECK: calll _puts + +; SSE2-DAG: movups {{.*}}, %xmm0 +; SSE2-DAG: movups {{.*}}, %xmm1 +; SSE2-DAG: movups {{.*}}, %xmm2 +; SSE2-DAG: movups {{.*}}, %xmm3 +; SSE2-DAG: movups {{.*}}, %xmm4 +; SSE2-DAG: movups {{.*}}, %xmm5 + +; AVX-DAG: vmovups {{.*}}, %ymm0 +; AVX-DAG: vmovups {{.*}}, %ymm1 +; AVX-DAG: vmovups {{.*}}, %ymm2 +; AVX-DAG: vmovups {{.*}}, %ymm3 +; AVX-DAG: vmovups {{.*}}, %ymm4 +; AVX-DAG: vmovups {{.*}}, %ymm5 + +; AVX512F-DAG: vmovups {{.*}}, %zmm0 +; AVX512F-DAG: vmovups {{.*}}, %zmm1 +; AVX512F-DAG: vmovups {{.*}}, %zmm2 +; AVX512F-DAG: vmovups {{.*}}, %zmm3 +; AVX512F-DAG: vmovups {{.*}}, %zmm4 +; AVX512F-DAG: vmovups {{.*}}, %zmm5 + +; AVX512VL-DAG: vmovups {{.*}}, %ymm0 +; AVX512VL-DAG: vmovups {{.*}}, %ymm1 +; AVX512VL-DAG: vmovups {{.*}}, %ymm2 +; AVX512VL-DAG: vmovups {{.*}}, %ymm3 +; AVX512VL-DAG: vmovups {{.*}}, %ymm4 +; AVX512VL-DAG: vmovups {{.*}}, %ymm5 + +; CHECK-DAG: movl {{.*}}, %ecx +; CHECK-DAG: movl {{.*}}, %edx +; CHECK: jmp vector_target_prefer256@@12 + +define x86_vectorcallcc i32 @vector_target_prefer256(i32 inreg %a, i32 inreg %b, i32 %c) "min-legal-vector-width"="256" "prefer-vector-width"="256" { + %a0 = add i32 %a, %b + %a1 = add i32 %a0, %c + ret i32 %a1 +} -- 2.7.4