From 09b2c890af21a187591bf4caa2531569593116e0 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 22 Jun 2015 18:29:44 +0000 Subject: [PATCH] [x86] set default reciprocal (division and square root) codegen to match GCC D8982 ( checked in at http://reviews.llvm.org/rL239001 ) added command-line options to allow reciprocal estimate instructions to be used in place of divisions and square roots. This patch changes the default settings for x86 targets to allow that recip codegen (except for scalar division because that breaks too much code) when using -ffast-math or its equivalent. This matches GCC behavior for this kind of codegen. Differential Revision: http://reviews.llvm.org/D10396 llvm-svn: 240310 --- llvm/lib/Target/X86/X86TargetMachine.cpp | 15 ++++--- llvm/test/CodeGen/X86/recip-fastmath.ll | 38 ++++++++--------- llvm/test/CodeGen/X86/sqrt-fastmath.ll | 70 ++++++++++++++++---------------- 3 files changed, 63 insertions(+), 60 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 3d6eb4f..fb9cb4b 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -110,12 +110,15 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, if (Subtarget.isTargetWin64()) this->Options.TrapUnreachable = true; - // TODO: By default, all reciprocal estimate operations are off because - // that matches the behavior before TargetRecip was added (except for btver2 - // which used subtarget features to enable this type of codegen). - // We should change this to match GCC behavior where everything but - // scalar division estimates are turned on by default with -ffast-math. - this->Options.Reciprocals.setDefaults("all", false, 1); + // By default (and when -ffast-math is on), enable estimate codegen for + // everything except scalar division. By default, use 1 refinement step for + // all operations. Defaults may be overridden by using command-line options. + // Scalar division estimates are disabled because they break too much + // real-world code. These defaults match GCC behavior. + this->Options.Reciprocals.setDefaults("sqrtf", true, 1); + this->Options.Reciprocals.setDefaults("divf", false, 1); + this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1); + this->Options.Reciprocals.setDefaults("vec-divf", true, 1); initAsmInfo(); } diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index 7f1521a..8e02dad 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE @@ -14,11 +14,11 @@ define float @reciprocal_estimate(float %x) #0 { %div = fdiv fast float 1.0, %x ret float %div -; CHECK-LABEL: reciprocal_estimate: -; CHECK: movss -; CHECK-NEXT: divss -; CHECK-NEXT: movaps -; CHECK-NEXT: retq +; NORECIP-LABEL: reciprocal_estimate: +; NORECIP: movss +; NORECIP-NEXT: divss +; NORECIP-NEXT: movaps +; NORECIP-NEXT: retq ; RECIP-LABEL: reciprocal_estimate: ; RECIP: vrcpss @@ -45,11 +45,11 @@ define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 { %div = fdiv fast <4 x float> , %x ret <4 x float> %div -; CHECK-LABEL: reciprocal_estimate_v4f32: -; CHECK: movaps -; CHECK-NEXT: divps -; CHECK-NEXT: movaps -; CHECK-NEXT: retq +; NORECIP-LABEL: reciprocal_estimate_v4f32: +; NORECIP: movaps +; NORECIP-NEXT: divps +; NORECIP-NEXT: movaps +; NORECIP-NEXT: retq ; RECIP-LABEL: reciprocal_estimate_v4f32: ; RECIP: vrcpps @@ -76,14 +76,14 @@ define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 { %div = fdiv fast <8 x float> , %x ret <8 x float> %div -; CHECK-LABEL: reciprocal_estimate_v8f32: -; CHECK: movaps -; CHECK: movaps -; CHECK-NEXT: divps -; CHECK-NEXT: divps -; CHECK-NEXT: movaps -; CHECK-NEXT: movaps -; CHECK-NEXT: retq +; NORECIP-LABEL: reciprocal_estimate_v8f32: +; NORECIP: movaps +; NORECIP: movaps +; NORECIP-NEXT: divps +; NORECIP-NEXT: divps +; NORECIP-NEXT: movaps +; NORECIP-NEXT: movaps +; NORECIP-NEXT: retq ; RECIP-LABEL: reciprocal_estimate_v8f32: ; RECIP: vrcpps diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll index 373fa53..0f8d9f4 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!sqrtf,!vec-sqrtf,!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE declare double @__sqrt_finite(double) #0 @@ -10,10 +10,10 @@ declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0 define double @fd(double %d) #0 { -; CHECK-LABEL: fd: -; CHECK: # BB#0: -; CHECK-NEXT: sqrtsd %xmm0, %xmm0 -; CHECK-NEXT: retq +; NORECIP-LABEL: fd: +; NORECIP: # BB#0: +; NORECIP-NEXT: sqrtsd %xmm0, %xmm0 +; NORECIP-NEXT: retq ; ; ESTIMATE-LABEL: fd: ; ESTIMATE: # BB#0: @@ -25,10 +25,10 @@ define double @fd(double %d) #0 { define float @ff(float %f) #0 { -; CHECK-LABEL: ff: -; CHECK: # BB#0: -; CHECK-NEXT: sqrtss %xmm0, %xmm0 -; CHECK-NEXT: retq +; NORECIP-LABEL: ff: +; NORECIP: # BB#0: +; NORECIP-NEXT: sqrtss %xmm0, %xmm0 +; NORECIP-NEXT: retq ; ; ESTIMATE-LABEL: ff: ; ESTIMATE: # BB#0: @@ -49,11 +49,11 @@ define float @ff(float %f) #0 { define x86_fp80 @fld(x86_fp80 %ld) #0 { -; CHECK-LABEL: fld: -; CHECK: # BB#0: -; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) -; CHECK-NEXT: fsqrt -; CHECK-NEXT: retq +; NORECIP-LABEL: fld: +; NORECIP: # BB#0: +; NORECIP-NEXT: fldt {{[0-9]+}}(%rsp) +; NORECIP-NEXT: fsqrt +; NORECIP-NEXT: retq ; ; ESTIMATE-LABEL: fld: ; ESTIMATE: # BB#0: @@ -67,12 +67,12 @@ define x86_fp80 @fld(x86_fp80 %ld) #0 { define float @reciprocal_square_root(float %x) #0 { -; CHECK-LABEL: reciprocal_square_root: -; CHECK: # BB#0: -; CHECK-NEXT: sqrtss %xmm0, %xmm1 -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: divss %xmm1, %xmm0 -; CHECK-NEXT: retq +; NORECIP-LABEL: reciprocal_square_root: +; NORECIP: # BB#0: +; NORECIP-NEXT: sqrtss %xmm0, %xmm1 +; NORECIP-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NORECIP-NEXT: divss %xmm1, %xmm0 +; NORECIP-NEXT: retq ; ; ESTIMATE-LABEL: reciprocal_square_root: ; ESTIMATE: # BB#0: @@ -89,12 +89,12 @@ define float @reciprocal_square_root(float %x) #0 { } define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 { -; CHECK-LABEL: reciprocal_square_root_v4f32: -; CHECK: # BB#0: -; CHECK-NEXT: sqrtps %xmm0, %xmm1 -; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] -; CHECK-NEXT: divps %xmm1, %xmm0 -; CHECK-NEXT: retq +; NORECIP-LABEL: reciprocal_square_root_v4f32: +; NORECIP: # BB#0: +; NORECIP-NEXT: sqrtps %xmm0, %xmm1 +; NORECIP-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] +; NORECIP-NEXT: divps %xmm1, %xmm0 +; NORECIP-NEXT: retq ; ; ESTIMATE-LABEL: reciprocal_square_root_v4f32: ; ESTIMATE: # BB#0: @@ -111,15 +111,15 @@ define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 { } define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 { -; CHECK-LABEL: reciprocal_square_root_v8f32: -; CHECK: # BB#0: -; CHECK-NEXT: sqrtps %xmm1, %xmm2 -; CHECK-NEXT: sqrtps %xmm0, %xmm3 -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: divps %xmm3, %xmm0 -; CHECK-NEXT: divps %xmm2, %xmm1 -; CHECK-NEXT: retq +; NORECIP-LABEL: reciprocal_square_root_v8f32: +; NORECIP: # BB#0: +; NORECIP-NEXT: sqrtps %xmm1, %xmm2 +; NORECIP-NEXT: sqrtps %xmm0, %xmm3 +; NORECIP-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] +; NORECIP-NEXT: movaps %xmm1, %xmm0 +; NORECIP-NEXT: divps %xmm3, %xmm0 +; NORECIP-NEXT: divps %xmm2, %xmm1 +; NORECIP-NEXT: retq ; ; ESTIMATE-LABEL: reciprocal_square_root_v8f32: ; ESTIMATE: # BB#0: -- 2.7.4