From 5750c1c07ef350f0901c1e46000481d28787743f Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 26 Oct 2012 00:25:10 +0000 Subject: [PATCH] X86 SSE Intrinsics: update header for sqrt_ss, rsqrt_ss and rcp_ss. There intrinsics pass through the upper FP values from the input. rdar://12558838 llvm-svn: 166743 --- clang/lib/Headers/xmmintrin.h | 9 ++++++--- clang/test/CodeGen/sse-builtins.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index e616157b047c..e2480ec7a0e3 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -95,7 +95,8 @@ _mm_div_ps(__m128 a, __m128 b) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_sqrt_ss(__m128 a) { - return __builtin_ia32_sqrtss(a); + __m128 c = __builtin_ia32_sqrtss(a); + return (__m128) { c[0], a[1], a[2], a[3] }; } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -107,7 +108,8 @@ _mm_sqrt_ps(__m128 a) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_rcp_ss(__m128 a) { - return __builtin_ia32_rcpss(a); + __m128 c = __builtin_ia32_rcpss(a); + return (__m128) { c[0], a[1], a[2], a[3] }; } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) @@ -119,7 +121,8 @@ _mm_rcp_ps(__m128 a) static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) _mm_rsqrt_ss(__m128 a) { - return __builtin_ia32_rsqrtss(a); + __m128 c = __builtin_ia32_rsqrtss(a); + return (__m128) { c[0], a[1], a[2], a[3] }; } static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) diff --git a/clang/test/CodeGen/sse-builtins.c b/clang/test/CodeGen/sse-builtins.c index 0e48560b0869..400209fca291 100644 --- a/clang/test/CodeGen/sse-builtins.c +++ b/clang/test/CodeGen/sse-builtins.c @@ -1,8 +1,39 @@ // RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s +#include #include #include +__m128 test_rsqrt_ss(__m128 x) { + // CHECK: define {{.*}} @test_rsqrt_ss + // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss + // CHECK: extractelement <4 x float> {{.*}}, i32 0 + // CHECK: extractelement <4 x float> {{.*}}, i32 1 + // CHECK: extractelement <4 x float> {{.*}}, i32 2 + // CHECK: extractelement <4 x float> {{.*}}, i32 3 + return _mm_rsqrt_ss(x); +} + +__m128 test_rcp_ss(__m128 x) { + // CHECK: define {{.*}} @test_rcp_ss + // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss + // CHECK: extractelement <4 x float> {{.*}}, i32 0 + // CHECK: extractelement <4 x float> {{.*}}, i32 1 + // CHECK: extractelement <4 x float> {{.*}}, i32 2 + // CHECK: extractelement <4 x float> {{.*}}, i32 3 + return _mm_rcp_ss(x); +} + +__m128 test_sqrt_ss(__m128 x) { + // CHECK: define {{.*}} @test_sqrt_ss + // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss + // CHECK: extractelement <4 x float> {{.*}}, i32 0 + // CHECK: extractelement <4 x float> {{.*}}, i32 1 + // CHECK: extractelement <4 x float> {{.*}}, i32 2 + // CHECK: extractelement <4 x float> {{.*}}, i32 3 + return _mm_sqrt_ss(x); +} + __m128 test_loadl_pi(__m128 x, void* y) { // CHECK: define {{.*}} @test_loadl_pi // CHECK: load <2 x float>* {{.*}}, align 1{{$}} -- 2.34.1