From b76ccacfb14439a587b028704fadfbf4a6044b80 Mon Sep 17 00:00:00 2001 From: Yunzhong Gao Date: Tue, 2 Feb 2016 21:39:23 +0000 Subject: [PATCH] Disable the vzeroupper insertion pass on PS4. See comments in test/CodeGen/X86/avx-vzeroupper.ll for more explanation. Original patch by: Sean Silva llvm-svn: 259576 --- llvm/lib/Target/X86/X86TargetMachine.cpp | 3 +++ llvm/test/CodeGen/X86/avx-vzeroupper.ll | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 0e7e4c0..fac3a45 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -270,6 +270,9 @@ void X86PassConfig::addPreEmitPass() { if (getOptLevel() != CodeGenOpt::None) addPass(createExecutionDependencyFixPass(&X86::VR128RegClass)); + if (TM->getTargetTriple().isPS4CPU()) + UseVZeroUpper = false; + if (UseVZeroUpper) addPass(createX86IssueVZeroUpperPass()); diff --git a/llvm/test/CodeGen/X86/avx-vzeroupper.ll b/llvm/test/CodeGen/X86/avx-vzeroupper.ll index a16dc70..976c9a8 100644 --- a/llvm/test/CodeGen/X86/avx-vzeroupper.ll +++ b/llvm/test/CodeGen/X86/avx-vzeroupper.ll @@ -1,4 +1,13 @@ ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-scei-ps4 -mattr=+avx | FileCheck --check-prefix=PS4 %s + +; The Jaguar (AMD Family 16h) cores in the PS4 don't benefit from vzeroupper. +; At most, the benefit is "garbage collecting" def'd upper parts of the ymm +; registers, but the core has so many FP phys regs that this benefit of freeing +; up the upper parts is for now not worth it. Unlike Intel, there is no +; performance hazard to def'ing the lower parts of a ymm without clearing the +; upper part. +; PS4-NOT: vzeroupper declare i32 @foo() declare <4 x float> @do_sse(<4 x float>) -- 2.7.4