From: Craig Topper Date: Sun, 13 May 2018 23:03:30 +0000 (+0000) Subject: [X86] Emit better code for _mm_cvtu32_sd, _mm_cvtu64_sd, _mm_cvtu32_ss, and _mm_cvtu6... X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6fa91254e463f333ce102e16b27b3b42a6108d5a;p=platform%2Fupstream%2Fllvm.git [X86] Emit better code for _mm_cvtu32_sd, _mm_cvtu64_sd, _mm_cvtu32_ss, and _mm_cvtu64_ss. We can use direct C code for these that will use uitofp and insertelement instructions. For the versions that take an explicit rounding mode we can't do this. llvm-svn: 332203 --- diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 72d5f17..c4cb5ae 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1840,7 +1840,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "nc", "avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtusi2sd32, "V2dV2dUi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi") TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "nc", "avx512vbmi,avx512vl") diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 1d5cc35..7592bc0 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -9521,7 +9521,8 @@ _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtu32_sd (__m128d __A, unsigned __B) { - return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); + __A[0] = __B; + return __A; } #ifdef __x86_64__ @@ -9532,8 +9533,8 @@ _mm_cvtu32_sd (__m128d __A, unsigned __B) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtu64_sd (__m128d __A, unsigned long long __B) { - return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, - _MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #endif @@ -9544,8 +9545,8 @@ _mm_cvtu64_sd (__m128d __A, unsigned long long __B) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtu32_ss (__m128 __A, unsigned __B) { - return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, - _MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #ifdef __x86_64__ @@ -9556,8 +9557,8 @@ _mm_cvtu32_ss (__m128 __A, unsigned __B) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtu64_ss (__m128 __A, unsigned long long __B) { - return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, - _MM_FROUND_CUR_DIRECTION); + __A[0] = __B; + return __A; } #endif diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 4699335d..c97a42f 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -7007,7 +7007,8 @@ __m128d test_mm_maskz_cvt_roundss_sd( __mmask8 __U, __m128d __A, __m128 __B) { __m128d test_mm_cvtu32_sd(__m128d __A, unsigned __B) { // CHECK-LABEL: @test_mm_cvtu32_sd - // CHECK: @llvm.x86.avx512.cvtusi2sd + // CHECK: uitofp i32 %{{.*}} to double + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_cvtu32_sd(__A, __B); } @@ -7020,7 +7021,8 @@ __m128d test_mm_cvt_roundu64_sd(__m128d __A, unsigned long long __B) { __m128d test_mm_cvtu64_sd(__m128d __A, unsigned long long __B) { // CHECK-LABEL: @test_mm_cvtu64_sd - // CHECK: @llvm.x86.avx512.cvtusi642sd + // CHECK: uitofp i64 %{{.*}} to double + // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_cvtu64_sd(__A, __B); } #endif @@ -7033,7 +7035,8 @@ __m128 test_mm_cvt_roundu32_ss(__m128 __A, unsigned __B) { __m128 test_mm_cvtu32_ss(__m128 __A, unsigned __B) { // CHECK-LABEL: @test_mm_cvtu32_ss - // CHECK: @llvm.x86.avx512.cvtusi2ss + // CHECK: uitofp i32 %{{.*}} to float + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_cvtu32_ss(__A, __B); } @@ -7046,7 +7049,8 @@ __m128 test_mm_cvt_roundu64_ss(__m128 __A, unsigned long long __B) { __m128 test_mm_cvtu64_ss(__m128 __A, unsigned long long __B) { // CHECK-LABEL: @test_mm_cvtu64_ss - // CHECK: @llvm.x86.avx512.cvtusi642ss + // CHECK: uitofp i64 %{{.*}} to float + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_cvtu64_ss(__A, __B); } #endif