From 96d02f550397c46278924dd1081b5b33da270a05 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 29 Apr 2017 17:17:06 +0000 Subject: [PATCH] [X86][AVX] Added support for _mm256_zext* helper intrinsics (PR32839) llvm-svn: 301749 --- clang/lib/Headers/avx512fintrin.h | 110 ++++++++++++++++++++++++++++++++++ clang/lib/Headers/avxintrin.h | 55 +++++++++++++++++ clang/test/CodeGen/avx-builtins.c | 21 +++++++ clang/test/CodeGen/avx512f-builtins.c | 41 +++++++++++++ 4 files changed, 227 insertions(+) diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index d8535f7..b556d04 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -528,6 +528,116 @@ _mm512_mask2int(__mmask16 __a) return (int)__a; } +/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a +/// 128-bit floating-point vector of [2 x double]. The lower 128 bits +/// contain the value of the source vector. The upper 384 bits are set +/// to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 128-bit vector of [2 x double]. +/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits +/// contain the value of the parameter. The upper 384 bits are set to zero. +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_zextpd128_pd512(__m128d __a) +{ + return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); +} + +/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a +/// 256-bit floating-point vector of [4 x double]. The lower 256 bits +/// contain the value of the source vector. The upper 256 bits are set +/// to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 256-bit vector of [4 x double]. +/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits +/// contain the value of the parameter. The upper 256 bits are set to zero. +static __inline __m512d __DEFAULT_FN_ATTRS +_mm512_zextpd256_pd512(__m256d __a) +{ + return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); +} + +/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a +/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain +/// the value of the source vector. The upper 384 bits are set to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 128-bit vector of [4 x float]. +/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits +/// contain the value of the parameter. The upper 384 bits are set to zero. +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_zextps128_ps512(__m128 __a) +{ + return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); +} + +/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a +/// 256-bit floating-point vector of [8 x float]. The lower 256 bits contain +/// the value of the source vector. The upper 256 bits are set to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 256-bit vector of [8 x float]. +/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits +/// contain the value of the parameter. The upper 256 bits are set to zero. +static __inline __m512 __DEFAULT_FN_ATTRS +_mm512_zextps256_ps512(__m256 __a) +{ + return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +} + +/// \brief Constructs a 512-bit integer vector from a 128-bit integer vector. +/// The lower 128 bits contain the value of the source vector. The upper +/// 384 bits are set to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 128-bit integer vector. +/// \returns A 512-bit integer vector. The lower 128 bits contain the value of +/// the parameter. The upper 384 bits are set to zero. +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_zextsi128_si512(__m128i __a) +{ + return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); +} + +/// \brief Constructs a 512-bit integer vector from a 256-bit integer vector. +/// The lower 256 bits contain the value of the source vector. The upper +/// 256 bits are set to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 256-bit integer vector. +/// \returns A 512-bit integer vector. The lower 256 bits contain the value of +/// the parameter. The upper 256 bits are set to zero. +static __inline __m512i __DEFAULT_FN_ATTRS +_mm512_zextsi256_si512(__m256i __a) +{ + return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); +} + /* Bitwise operators */ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b) diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 5381878..cdb7aa4 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -4523,6 +4523,61 @@ _mm256_castsi128_si256(__m128i __a) return __builtin_shufflevector((__v2di)__a, (__v2di)__a, 0, 1, -1, -1); } +/// \brief Constructs a 256-bit floating-point vector of [4 x double] from a +/// 128-bit floating-point vector of [2 x double]. The lower 128 bits +/// contain the value of the source vector. The upper 128 bits are set +/// to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 128-bit vector of [2 x double]. +/// \returns A 256-bit floating-point vector of [4 x double]. The lower 128 bits +/// contain the value of the parameter. The upper 128 bits are set to zero. +static __inline __m256d __DEFAULT_FN_ATTRS +_mm256_zextpd128_pd256(__m128d __a) +{ + return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3); +} + +/// \brief Constructs a 256-bit floating-point vector of [8 x float] from a +/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain +/// the value of the source vector. The upper 128 bits are set to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 128-bit vector of [4 x float]. +/// \returns A 256-bit floating-point vector of [8 x float]. The lower 128 bits +/// contain the value of the parameter. The upper 128 bits are set to zero. +static __inline __m256 __DEFAULT_FN_ATTRS +_mm256_zextps128_ps256(__m128 __a) +{ + return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7); +} + +/// \brief Constructs a 256-bit integer vector from a 128-bit integer vector. +/// The lower 128 bits contain the value of the source vector. The upper +/// 128 bits are set to zero. +/// +/// \headerfile +/// +/// This intrinsic has no corresponding instruction. +/// +/// \param __a +/// A 128-bit integer vector. +/// \returns A 256-bit integer vector. The lower 128 bits contain the value of +/// the parameter. The upper 128 bits are set to zero. +static __inline __m256i __DEFAULT_FN_ATTRS +_mm256_zextsi128_si256(__m128i __a) +{ + return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3); +} + /* Vector insert. We use macros rather than inlines because we only want to accept diff --git a/clang/test/CodeGen/avx-builtins.c b/clang/test/CodeGen/avx-builtins.c index 4832664..90f428e 100644 --- a/clang/test/CodeGen/avx-builtins.c +++ b/clang/test/CodeGen/avx-builtins.c @@ -1386,6 +1386,27 @@ void test_mm256_zeroupper() { return _mm256_zeroupper(); } +__m256d test_mm256_zextpd128_pd256(__m128d A) { + // CHECK-LABEL: test_mm256_zextpd128_pd256 + // CHECK: store <2 x double> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> + return _mm256_zextpd128_pd256(A); +} + +__m256 test_mm256_zextps128_ps256(__m128 A) { + // CHECK-LABEL: test_mm256_zextps128_ps256 + // CHECK: store <4 x float> zeroinitializer + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> + return _mm256_zextps128_ps256(A); +} + +__m256i test_mm256_zextsi128_si256(__m128i A) { + // CHECK-LABEL: test_mm256_zextsi128_si256 + // CHECK: store <2 x i64> zeroinitializer + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> + return _mm256_zextsi128_si256(A); +} + double test_mm256_cvtsd_f64(__m256d __a) { // CHECK-LABEL: @test_mm256_cvtsd_f64 diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index 3ae8014..c66d836 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -8387,3 +8387,44 @@ __m512 test_mm512_mask_abs_ps(__m512 __W, __mmask16 __U, __m512 __A){ return _mm512_mask_abs_ps( __W, __U, __A); } +__m512d test_mm512_zextpd128_pd512(__m128d A) { + // CHECK-LABEL: test_mm512_zextpd128_pd512 + // CHECK: store <2 x double> zeroinitializer + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> + return _mm512_zextpd128_pd512(A); +} + +__m512d test_mm512_zextpd256_pd512(__m256d A) { + // CHECK-LABEL: test_mm512_zextpd256_pd512 + // CHECK: store <4 x double> zeroinitializer + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> + return _mm512_zextpd256_pd512(A); +} + +__m512 test_mm512_zextps128_ps512(__m128 A) { + // CHECK-LABEL: test_mm512_zextps128_ps512 + // CHECK: store <4 x float> zeroinitializer + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> + return _mm512_zextps128_ps512(A); +} + +__m512 test_mm512_zextps256_ps512(__m256 A) { + // CHECK-LABEL: test_mm512_zextps256_ps512 + // CHECK: store <8 x float> zeroinitializer + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <16 x i32> + return _mm512_zextps256_ps512(A); +} + +__m512i test_mm512_zextsi128_si512(__m128i A) { + // CHECK-LABEL: test_mm512_zextsi128_si512 + // CHECK: store <2 x i64> zeroinitializer + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> + return _mm512_zextsi128_si512(A); +} + +__m512i test_mm512_zextsi256_si512(__m256i A) { + // CHECK-LABEL: test_mm512_zextsi256_si512 + // CHECK: store <4 x i64> zeroinitializer + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> + return _mm512_zextsi256_si512(A); +} -- 2.7.4