From 48e298b8c4cc519ed4d497d6848a8105974fc12e Mon Sep 17 00:00:00 2001 From: Jina Nahias Date: Mon, 6 Nov 2017 07:04:12 +0000 Subject: [PATCH] lowering broadcastm Change-Id: I0661abea3e3742860e0a03ff9e4fcdc367eff7db llvm-svn: 317456 --- clang/include/clang/Basic/BuiltinsX86.def | 6 ---- clang/lib/Headers/avx512cdintrin.h | 5 +-- clang/lib/Headers/avx512vlcdintrin.h | 10 +++--- clang/test/CodeGen/avx512cdintrin.c | 38 +++++++++++++++++---- clang/test/CodeGen/avx512vlcd-builtins.c | 56 +++++++++++++++++++++++-------- 5 files changed, 82 insertions(+), 33 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index d57ea8f..cbf0778 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1580,12 +1580,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2LLiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastmb512, "V8LLiUc","","avx512cd") -TARGET_BUILTIN(__builtin_ia32_broadcastmw512, "V16iUs","","avx512cd") -TARGET_BUILTIN(__builtin_ia32_broadcastmb128, "V2LLiUc","","avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi","","avx512bw") diff --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h index 23c4235..ec7e0cd 100644 --- a/clang/lib/Headers/avx512cdintrin.h +++ b/clang/lib/Headers/avx512cdintrin.h @@ -130,13 +130,14 @@ _mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastmb_epi64 (__mmask8 __A) { - return (__m512i) __builtin_ia32_broadcastmb512 (__A); + return (__m512i) _mm512_set1_epi64((long long) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastmw_epi32 (__mmask16 __A) { - return (__m512i) __builtin_ia32_broadcastmw512 (__A); + return (__m512i) _mm512_set1_epi32((int) __A); + } #undef __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vlcdintrin.h b/clang/lib/Headers/avx512vlcdintrin.h index 7b02e2e1..8f1cd25 100644 --- a/clang/lib/Headers/avx512vlcdintrin.h +++ b/clang/lib/Headers/avx512vlcdintrin.h @@ -33,26 +33,26 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastmb_epi64 (__mmask8 __A) -{ - return (__m128i) __builtin_ia32_broadcastmb128 (__A); +{ + return (__m128i) _mm_set1_epi64x((long long) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastmb_epi64 (__mmask8 __A) { - return (__m256i) __builtin_ia32_broadcastmb256 (__A); + return (__m256i) _mm256_set1_epi64x((long long)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastmw_epi32 (__mmask16 __A) { - return (__m128i) __builtin_ia32_broadcastmw128 (__A); + return (__m128i) _mm_set1_epi32((int)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastmw_epi32 (__mmask16 __A) { - return (__m256i) __builtin_ia32_broadcastmw256 (__A); + return (__m256i) _mm256_set1_epi32((int)__A); } diff --git a/clang/test/CodeGen/avx512cdintrin.c b/clang/test/CodeGen/avx512cdintrin.c index a286018..e01d277 100644 --- a/clang/test/CodeGen/avx512cdintrin.c +++ b/clang/test/CodeGen/avx512cdintrin.c @@ -68,14 +68,40 @@ __m512i test_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { return _mm512_maskz_lzcnt_epi64(__U,__A); } -__m512i test_mm512_broadcastmb_epi64(__mmask8 __A) { +__m512i test_mm512_broadcastmb_epi64(__m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_broadcastmb_epi64 - // CHECK: @llvm.x86.avx512.broadcastmb.512 - return _mm512_broadcastmb_epi64(__A); + // CHECK: icmp eq <8 x i64> %{{.*}}, %{{.*}} + // CHECK: zext i8 %{{.*}} to i64 + // CHECK: insertelement <8 x i64> undef, i64 %{{.*}}, i32 0 + // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 1 + // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 2 + // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 3 + // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 4 + // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 5 + // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 6 + // CHECK: insertelement <8 x i64> %{{.*}}, i64 %{{.*}}, i32 7 + return _mm512_broadcastmb_epi64(_mm512_cmpeq_epu64_mask ( a, b)); } -__m512i test_mm512_broadcastmw_epi32(__mmask16 __A) { +__m512i test_mm512_broadcastmw_epi32(__m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_broadcastmw_epi32 - // CHECK: @llvm.x86.avx512.broadcastmw.512 - return _mm512_broadcastmw_epi32(__A); + // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} + // CHECK: zext i16 %{{.*}} to i32 + // CHECK: insertelement <16 x i32> undef, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + // CHECK: insertelement <16 x i32> %{{.*}}, i32 %{{.*}} + return _mm512_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b)); } diff --git a/clang/test/CodeGen/avx512vlcd-builtins.c b/clang/test/CodeGen/avx512vlcd-builtins.c index 643f24f..376a342 100644 --- a/clang/test/CodeGen/avx512vlcd-builtins.c +++ b/clang/test/CodeGen/avx512vlcd-builtins.c @@ -3,28 +3,56 @@ #include -__m128i test_mm_broadcastmb_epi64(__mmask8 __A) { +__m128i test_mm_broadcastmb_epi64(__m128i a,__m128i b) { // CHECK-LABEL: @test_mm_broadcastmb_epi64 - // CHECK: @llvm.x86.avx512.broadcastmb.128 - return _mm_broadcastmb_epi64(__A); + // CHECK: icmp eq <4 x i32> %{{.*}}, %{{.*}} + // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> + // CHECK: bitcast <8 x i1> %{{.*}} to i8 + // CHECK: zext i8 %{{.*}} to i64 + // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0 + // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 + return _mm_broadcastmb_epi64(_mm_cmpeq_epi32_mask (a, b)); } -__m256i test_mm256_broadcastmb_epi64(__mmask8 __A) { +__m256i test_mm256_broadcastmb_epi64(__m256i a, __m256i b) { // CHECK-LABEL: @test_mm256_broadcastmb_epi64 - // CHECK: @llvm.x86.avx512.broadcastmb.256 - return _mm256_broadcastmb_epi64(__A); -} - -__m128i test_mm_broadcastmw_epi32(__mmask16 __A) { + // CHECK: icmp eq <4 x i64> %{{.*}}, %{{.*}} + // CHECK: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> + // CHECK: bitcast <8 x i1> %{{.*}} to i8 + // CHECK: zext i8 %{{.*}} to i64 + // CHECK: insertelement <4 x i64> undef, i64 %{{.*}}, i32 0 + // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 1 + // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 2 + // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 3 + return _mm256_broadcastmb_epi64(_mm256_cmpeq_epi64_mask ( a, b)); +} + +__m128i test_mm_broadcastmw_epi32(__m512i a, __m512i b) { // CHECK-LABEL: @test_mm_broadcastmw_epi32 - // CHECK: @llvm.x86.avx512.broadcastmw.128 - return _mm_broadcastmw_epi32(__A); + // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} + // CHECK: bitcast <16 x i1> %{{.*}} to i16 + // CHECK: zext i16 %{{.*}} to i32 + // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 2 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 + return _mm_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b)); } -__m256i test_mm256_broadcastmw_epi32(__mmask16 __A) { +__m256i test_mm256_broadcastmw_epi32(__m512i a, __m512i b) { // CHECK-LABEL: @test_mm256_broadcastmw_epi32 - // CHECK: @llvm.x86.avx512.broadcastmw.256 - return _mm256_broadcastmw_epi32(__A); + // CHECK: icmp eq <16 x i32> %{{.*}}, %{{.*}} + // CHECK: bitcast <16 x i1> %{{.*}} to i16 + // CHECK: zext i16 %{{.*}} to i32 + // CHECK: insertelement <8 x i32> undef, i32 %{{.*}}, i32 0 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 1 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 2 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 3 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 4 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 5 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 6 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 7 + return _mm256_broadcastmw_epi32(_mm512_cmpeq_epi32_mask ( a, b)); } __m128i test_mm_conflict_epi64(__m128i __A) { -- 2.7.4