From b34eef7b4139f6bee2f5b5c1b740bf2ad51627d4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 6 Dec 2016 08:08:12 +0000 Subject: [PATCH] [X86] Remove another weird scalar sqrt/rcp/rsqrt pattern. This pattern turned a vector sqrt/rcp/rsqrt operation of sse_load_f32/f64 into the the scalar instruction for the operation and put undef into the upper bits. For correctness, the resulting code should still perform the sqrt/rcp/rsqrt on the upper bits after the load is extended since that's what the operation asked for. Particularly in the case where the upper bits are 0, in that case we need calculate the sqrt/rcp/rsqrt of the zeroes and keep the result in the upper-bits. This implies we should be using the packed instruction still. The only test case for this pattern is one I just added so there was no coverage of this. llvm-svn: 288784 --- llvm/lib/Target/X86/X86InstrSSE.td | 6 ------ llvm/test/CodeGen/X86/avx-arith.ll | 3 ++- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 1a8e001..3adcf9a 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3414,9 +3414,6 @@ multiclass sse_fp_unop_s opc, string OpcodeStr, RegisterClass RC, } let Predicates = [target] in { - def : Pat<(vt (OpNode mem_cpat:$src)), - (vt (COPY_TO_REGCLASS (vt (!cast(NAME#Suffix##m_Int) - (vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>; // These are unary operations, but they are modeled as having 2 source operands // because the high elements of the destination are unchanged in SSE. def : Pat<(Intr VR128:$src), @@ -3490,9 +3487,6 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, def : Pat<(ScalarVT (OpNode (load addr:$src))), (!cast("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)), addr:$src)>; - def : Pat<(vt (OpNode mem_cpat:$src)), - (!cast("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), - mem_cpat:$src)>; } } diff --git a/llvm/test/CodeGen/X86/avx-arith.ll b/llvm/test/CodeGen/X86/avx-arith.ll index 0f3b0c9..66c09e0 100644 --- a/llvm/test/CodeGen/X86/avx-arith.ll +++ b/llvm/test/CodeGen/X86/avx-arith.ll @@ -364,7 +364,8 @@ define <4 x float> @int_sqrt_ss() { define <2 x double> @vector_sqrt_scalar_load(double* %a0) optsize { ; CHECK-LABEL: vector_sqrt_scalar_load: ; CHECK: ## BB#0: -; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vsqrtpd %xmm0, %xmm0 ; CHECK-NEXT: retq %a1 = load double, double* %a0 %a2 = insertelement <2 x double> undef, double %a1, i32 0 -- 2.7.4