From f893edeaea990011df26afa0313d4dc16983e2df Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Mon, 19 Jan 2015 20:12:05 +0000 Subject: [PATCH] [AVX512] Add sub-vector FP extracts Analogous to AVX2, these need to be implemented as macros to properly propagate the immediate index operand. Part of llvm-svn: 226496 --- clang/include/clang/Basic/BuiltinsX86.def | 2 ++ clang/lib/Headers/avx512fintrin.h | 18 ++++++++++++++++++ clang/test/CodeGen/avx512f-builtins.c | 14 ++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 5a28aaa..9a6b685 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -912,6 +912,8 @@ BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "") BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "") BUILTIN(__builtin_ia32_alignq512_mask, "V8LLiV8LLiV8LLiUcV8LLiUc", "") BUILTIN(__builtin_ia32_alignd512_mask, "V16iV16iV16iUcV16iUc", "") +BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIcV4dUc", "") +BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIcV4fUc", "") BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dvC*V8iUciC", "") BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fvC*UsiC", "") BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dvC*V8LLiUciC", "") diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index e6f128d..eda1b5c 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -633,6 +633,24 @@ _mm512_valign_epi32(__m512i __A, __m512i __B, const int __I) (__mmask16) -1); } +/* Vector Extract */ + +#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ + __m512d __A = (A); \ + (__m256d) \ + __builtin_ia32_extractf64x4_mask((__v8df)__A, \ + (I), \ + (__v4df)_mm256_setzero_si256(), \ + (__mmask8) -1); }) + +#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ + __m512 __A = (A); \ + (__m128) \ + __builtin_ia32_extractf32x4_mask((__v16sf)__A, \ + (I), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8) -1); }) + /* Vector Blend */ static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 5caf314..04cf3cc 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -264,3 +264,17 @@ __mmask8 test_mm512_cmp_pd_mask(__m512 __a, __m512 __b) { // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 return _mm512_cmp_pd_mask(__a, __b, 0); } + +__m256d test_mm512_extractf64x4_pd(__m512d a) +{ + // CHECK-LABEL: @test_mm512_extractf64x4_pd + // CHECK: @llvm.x86.avx512.mask.vextractf64x4.512 + return _mm512_extractf64x4_pd(a, 1); +} + +__m128 test_mm512_extractf32x4_ps(__m512 a) +{ + // CHECK-LABEL: @test_mm512_extractf32x4_ps + // CHECK: @llvm.x86.avx512.mask.vextractf32x4.512 + return _mm512_extractf32x4_ps(a, 1); +} -- 2.7.4