[AVX-512] Remove masked vector extract builtins and replace with native shufflevector...
authorCraig Topper <craig.topper@gmail.com>
Mon, 31 Oct 2016 04:30:56 +0000 (04:30 +0000)
committerCraig Topper <craig.topper@gmail.com>
Mon, 31 Oct 2016 04:30:56 +0000 (04:30 +0000)
Unfortunately, the backend currently doesn't fold masks into the instructions correctly when they come from these shufflevectors. I'll work on that in a future commit.

llvm-svn: 285540

clang/include/clang/Basic/BuiltinsX86.def
clang/lib/Headers/avx512dqintrin.h
clang/lib/Headers/avx512fintrin.h
clang/lib/Headers/avx512vldqintrin.h
clang/lib/Headers/avx512vlintrin.h
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGen/avx512dq-builtins.c
clang/test/CodeGen/avx512f-builtins.c
clang/test/CodeGen/avx512vl-builtins.c
clang/test/CodeGen/avx512vldq-builtins.c

index e028b3b..ba1d51d 100644 (file)
@@ -1004,8 +1004,6 @@ TARGET_BUILTIN(__builtin_ia32_alignd128_mask, "V4iV4iV4iIiV4iUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_alignd256_mask, "V8iV8iV8iIiV8iUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_alignq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_alignq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "", "avx512f")
 
 TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUci","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUci","","avx512vl")
@@ -1727,16 +1725,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_insertf32x8_mask, "V16fV16fV8fIiV16fUs","","avx512dq")
 TARGET_BUILTIN(__builtin_ia32_insertf64x2_512_mask, "V8dV8dV2dIiV8dUc","","avx512dq")
 TARGET_BUILTIN(__builtin_ia32_inserti32x8_mask, "V16iV16iV8iIiV16iUs","","avx512dq")
index a24a91c..c5ae250 100644 (file)
@@ -1116,70 +1116,80 @@ _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
 }
 
 #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
-  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
-                                           (__v8sf)_mm256_setzero_ps(), \
-                                           (__mmask8)-1); })
+  (__m256)__builtin_shufflevector((__v16sf)(__m512)(A),           \
+                                  (__v16sf)_mm512_undefined_ps(), \
+                                  ((imm) & 1) ?  8 : 0,           \
+                                  ((imm) & 1) ?  9 : 1,           \
+                                  ((imm) & 1) ? 10 : 2,           \
+                                  ((imm) & 1) ? 11 : 3,           \
+                                  ((imm) & 1) ? 12 : 4,           \
+                                  ((imm) & 1) ? 13 : 5,           \
+                                  ((imm) & 1) ? 14 : 6,           \
+                                  ((imm) & 1) ? 15 : 7); })
 
 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
-  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
-                                           (__v8sf)(__m256)(W), \
-                                           (__mmask8)(U)); })
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                   (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
+                                   (__v8sf)(W)); })
 
 #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
-  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
-                                           (__v8sf)_mm256_setzero_ps(), \
-                                           (__mmask8)(U)); })
+  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+                                   (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
+                                   (__v8sf)_mm256_setzero_ps()); })
 
 #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
-                                                (int)(imm), \
-                                                (__v2df)_mm_setzero_pd(), \
-                                                (__mmask8)-1); })
+  (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
+                                   (__v8df)_mm512_undefined_pd(), \
+                                   0 + ((imm) & 0x3) * 2,         \
+                                   1 + ((imm) & 0x3) * 2); })
 
 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
-                                                (int)(imm), \
-                                                (__v2df)(__m128d)(W), \
-                                                (__mmask8)(U)); })
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                   (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
+                                   (__v2df)(W)); })
 
 #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
-                                                (int)(imm), \
-                                                (__v2df)_mm_setzero_pd(), \
-                                                (__mmask8)(U)); })
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                   (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
+                                   (__v2df)_mm_setzero_pd()); })
 
 #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
-                                            (__v8si)_mm256_setzero_si256(), \
-                                            (__mmask8)-1); })
+  (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
+                                   (__v16si)_mm512_undefined_epi32(), \
+                                   ((imm) & 1) ?  8 : 0,              \
+                                   ((imm) & 1) ?  9 : 1,              \
+                                   ((imm) & 1) ? 10 : 2,              \
+                                   ((imm) & 1) ? 11 : 3,              \
+                                   ((imm) & 1) ? 12 : 4,              \
+                                   ((imm) & 1) ? 13 : 5,              \
+                                   ((imm) & 1) ? 14 : 6,              \
+                                   ((imm) & 1) ? 15 : 7); })
 
 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
-                                            (__v8si)(__m256i)(W), \
-                                            (__mmask8)(U)); })
+  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+                                (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
+                                (__v8si)(W)); })
 
 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
-                                            (__v8si)_mm256_setzero_si256(), \
-                                            (__mmask8)(U)); })
+  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+                                (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
+                                (__v8si)_mm256_setzero_si256()); })
 
 #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
-                                                (int)(imm), \
-                                                (__v2di)_mm_setzero_di(), \
-                                                (__mmask8)-1); })
+  (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A),          \
+                                   (__v8di)_mm512_undefined_epi32(), \
+                                   0 + ((imm) & 0x3) * 2,           \
+                                   1 + ((imm) & 0x3) * 2); })
 
 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
-                                                (int)(imm), \
-                                                (__v2di)(__m128i)(W), \
-                                                (__mmask8)(U)); })
+  (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
+                                (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
+                                (__v2di)(W)); })
 
 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
-                                                (int)(imm), \
-                                                (__v2di)_mm_setzero_di(), \
-                                                (__mmask8)(U)); })
+  (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
+                                (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
+                                (__v2di)_mm_setzero_di()); })
 
 #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
   (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
index d08d757..1cadb86 100644 (file)
@@ -3440,35 +3440,42 @@ _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
                                          (__mmask16)(U)); })
 /* Vector Extract */
 
-#define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
-  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
-                                            (__v4df)_mm256_setzero_si256(), \
-                                            (__mmask8)-1); })
+#define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
+  (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
+                                   (__v8df)_mm512_undefined_pd(), \
+                                   ((I) & 1) ? 4 : 0,             \
+                                   ((I) & 1) ? 5 : 1,             \
+                                   ((I) & 1) ? 6 : 2,             \
+                                   ((I) & 1) ? 7 : 3); })
 
 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
-  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
-                                            (__v4df)(__m256d)(W), \
-                                            (__mmask8)(U)); })
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                   (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
+                                   (__v4df)(W)); })
 
 #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
-  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
-                                            (__v4df)_mm256_setzero_pd(), \
-                                            (__mmask8)(U)); })
+  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+                                   (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
+                                   (__v4df)_mm256_setzero_pd()); })
 
-#define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
-  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
-                                           (__v4sf)_mm_setzero_ps(), \
-                                           (__mmask8)-1); })
+#define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
+  (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
+                                  (__v16sf)_mm512_undefined_ps(), \
+                                  0 + ((I) & 0x3) * 4,            \
+                                  1 + ((I) & 0x3) * 4,            \
+                                  2 + ((I) & 0x3) * 4,            \
+                                  3 + ((I) & 0x3) * 4); })
 
 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
-  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
-                                           (__v4sf)(__m128)(W), \
-                                           (__mmask8)(U)); })
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                   (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
+                                   (__v4sf)(W)); })
 
 #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
-  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
-                                           (__v4sf)_mm_setzero_ps(), \
-                                           (__mmask8)(U)); })
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                   (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
+                                   (__v4sf)_mm_setzero_ps()); })
+
 /* Vector Blend */
 
 static __inline __m512d __DEFAULT_FN_ATTRS
@@ -7895,35 +7902,41 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
 }
 
-#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
-                                            (__v4si)_mm_undefined_si128(), \
-                                            (__mmask8)-1); })
+#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
+  (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
+                                   (__v16si)_mm512_undefined_epi32(), \
+                                   0 + ((imm) & 0x3) * 4,             \
+                                   1 + ((imm) & 0x3) * 4,             \
+                                   2 + ((imm) & 0x3) * 4,             \
+                                   3 + ((imm) & 0x3) * 4); })
 
 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
-                                            (__v4si)(__m128i)(W), \
-                                            (__mmask8)(U)); })
+  (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
+                                (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
+                                (__v4si)__W); })
 
 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
-                                            (__v4si)_mm_setzero_si128(), \
-                                            (__mmask8)(U)); })
+  (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
+                                (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
+                                (__v4si)_mm_setzero_si128()); })
 
-#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
-                                            (__v4di)_mm256_undefined_si256(), \
-                                            (__mmask8)-1); })
+#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
+  (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
+                                   (__v8di)_mm512_undefined_epi32(), \
+                                   ((imm) & 1) ? 4 : 0,              \
+                                   ((imm) & 1) ? 5 : 1,              \
+                                   ((imm) & 1) ? 6 : 2,              \
+                                   ((imm) & 1) ? 7 : 3); })
 
 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
-                                            (__v4di)(__m256i)(W), \
-                                            (__mmask8)(U)); })
+  (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,      \
+                                (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
+                                (__v4di)__W); })
 
 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
-  (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
-                                            (__v4di)_mm256_setzero_si256(), \
-                                            (__mmask8)(U)); })
+  (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,      \
+                                (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
+                                (__v4di)_mm256_setzero_si256()); })
 
 #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
   (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
index ffe5959..0f617c1 100644 (file)
@@ -1096,40 +1096,36 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
 }
 
 #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
-                                                (int)(imm), \
-                                                (__v2df)_mm_setzero_pd(), \
-                                                (__mmask8)-1); })
+  (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A),           \
+                                   (__v4df)_mm256_undefined_pd(), \
+                                   ((imm) & 1) ? 2 : 0,           \
+                                   ((imm) & 1) ? 3 : 1); })
 
 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
-                                                (int)(imm), \
-                                                (__v2df)(__m128d)(W), \
-                                                (__mmask8)(U)); })
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                   (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
+                                   (__v2df)(W)); })
 
 #define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
-  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
-                                                (int)(imm), \
-                                                (__v2df)_mm_setzero_pd(), \
-                                                (__mmask8)(U)); })
+  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+                                   (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
+                                   (__v2df)_mm_setzero_pd()); })
 
 #define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
-                                                (int)(imm), \
-                                                (__v2di)_mm_setzero_di(), \
-                                                (__mmask8)-1); })
+  (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A),             \
+                                   (__v4di)_mm256_undefined_si256(), \
+                                   ((imm) & 1) ? 2 : 0,              \
+                                   ((imm) & 1) ? 3 : 1); })
 
 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
-                                                (int)(imm), \
-                                                (__v2di)(__m128i)(W), \
-                                                (__mmask8)(U)); })
+  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+                                (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
+                                (__v2di)(W)); })
 
 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
-                                                (int)(imm), \
-                                                (__v2di)_mm_setzero_di(), \
-                                                (__mmask8)(U)); })
+  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+                                (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
+                                (__v2di)_mm_setzero_di()); })
 
 #define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
   (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
index e915087..42e5d91 100644 (file)
@@ -8273,40 +8273,40 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
 }
 
 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
-  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
-                                               (int)(imm), \
-                                               (__v4sf)_mm_setzero_ps(), \
-                                               (__mmask8)-1); })
+  (__m128)__builtin_shufflevector((__v8sf)(__m256)(A),           \
+                                  (__v8sf)_mm256_undefined_ps(), \
+                                  ((imm) & 1) ? 4 : 0,           \
+                                  ((imm) & 1) ? 5 : 1,           \
+                                  ((imm) & 1) ? 6 : 2,           \
+                                  ((imm) & 1) ? 7 : 3); })
 
 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
-  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
-                                               (int)(imm), \
-                                               (__v4sf)(__m128)(W), \
-                                               (__mmask8)(U)); })
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                   (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
+                                   (__v4sf)(W)); })
 
 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
-  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
-                                               (int)(imm), \
-                                               (__v4sf)_mm_setzero_ps(), \
-                                               (__mmask8)(U)); })
+  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                   (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
+                                   (__v4sf)_mm_setzero_ps()); })
 
 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
-                                                (int)(imm), \
-                                                (__v4si)_mm_setzero_si128(), \
-                                                (__mmask8)-1); })
+  (__m128i)__builtin_shufflevector((__v8si)(__m256)(A),              \
+                                   (__v8si)_mm256_undefined_si256(), \
+                                   ((imm) & 1) ? 4 : 0,              \
+                                   ((imm) & 1) ? 5 : 1,              \
+                                   ((imm) & 1) ? 6 : 2,              \
+                                   ((imm) & 1) ? 7 : 3); })
 
 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
-                                                (int)(imm), \
-                                                (__v4si)(__m128i)(W), \
-                                                (__mmask8)(U)); })
+  (__m128i)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
+                                (__v4si)(W)); })
 
 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
-  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
-                                                (int)(imm), \
-                                                (__v4si)_mm_setzero_si128(), \
-                                                (__mmask8)(U)); })
+  (__m128i)__builtin_ia32_selectps_128((__mmask8)(U), \
+                                (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
+                                (__v4si)_mm_setzero_si128()); })
 
 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
   (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
index c0d85c9..c73bd4c 100644 (file)
@@ -1971,21 +1971,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   switch (BuiltinID) {
   default:
     return false;
-  case X86::BI__builtin_ia32_extractf64x4_mask:
-  case X86::BI__builtin_ia32_extracti64x4_mask:
-  case X86::BI__builtin_ia32_extractf32x8_mask:
-  case X86::BI__builtin_ia32_extracti32x8_mask:
-  case X86::BI__builtin_ia32_extractf64x2_256_mask:
-  case X86::BI__builtin_ia32_extracti64x2_256_mask:
-  case X86::BI__builtin_ia32_extractf32x4_256_mask:
-  case X86::BI__builtin_ia32_extracti32x4_256_mask:
-    i = 1; l = 0; u = 1;
-    break;
   case X86::BI_mm_prefetch:
-  case X86::BI__builtin_ia32_extractf32x4_mask:
-  case X86::BI__builtin_ia32_extracti32x4_mask:
-  case X86::BI__builtin_ia32_extractf64x2_512_mask:
-  case X86::BI__builtin_ia32_extracti64x2_512_mask:
     i = 1; l = 0; u = 3;
     break;
   case X86::BI__builtin_ia32_insertf32x8_mask:
index 9a2b48c..d6191ec 100644 (file)
@@ -1054,73 +1054,81 @@ __m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
 }
 __m256 test_mm512_extractf32x8_ps(__m512 __A) {
   // CHECK-LABEL: @test_mm512_extractf32x8_ps
-  // CHECK: @llvm.x86.avx512.mask.vextractf32x8
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_extractf32x8_ps(__A, 1); 
 }
 
 __m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_mask_extractf32x8_ps
-  // CHECK: @llvm.x86.avx512.mask.vextractf32x8
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1); 
 }
 
 __m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) {
   // CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps
-  // CHECK: @llvm.x86.avx512.mask.vextractf32x8
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_extractf32x8_ps(__U, __A, 1); 
 }
 
 __m128d test_mm512_extractf64x2_pd(__m512d __A) {
   // CHECK-LABEL: @test_mm512_extractf64x2_pd
-  // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+  // CHECK: shufflevector <8 x double> %0, <8 x double> undef, <2 x i32> <i32 6, i32 7>
   return _mm512_extractf64x2_pd(__A, 3); 
 }
 
 __m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) {
   // CHECK-LABEL: @test_mm512_mask_extractf64x2_pd
-  // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3); 
 }
 
 __m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) {
   // CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd
-  // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm512_maskz_extractf64x2_pd(__U, __A, 3); 
 }
 
 __m256i test_mm512_extracti32x8_epi32(__m512i __A) {
   // CHECK-LABEL: @test_mm512_extracti32x8_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x8
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm512_extracti32x8_epi32(__A, 1); 
 }
 
 __m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x8
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1); 
 }
 
 __m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x8
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
   return _mm512_maskz_extracti32x8_epi32(__U, __A, 1); 
 }
 
 __m128i test_mm512_extracti64x2_epi64(__m512i __A) {
   // CHECK-LABEL: @test_mm512_extracti64x2_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
   return _mm512_extracti64x2_epi64(__A, 3); 
 }
 
 __m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3); 
 }
 
 __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm512_maskz_extracti64x2_epi64(__U, __A, 3); 
 }
 
index 2858f98..55b44b8 100644 (file)
@@ -1236,38 +1236,42 @@ __mmask8 test_mm512_mask_cmpunord_ps_mask(__mmask8 k, __m512 a, __m512 b) {
 __m256d test_mm512_extractf64x4_pd(__m512d a)
 {
   // CHECK-LABEL: @test_mm512_extractf64x4_pd
-  // CHECK: @llvm.x86.avx512.mask.vextractf64x4.512
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm512_extractf64x4_pd(a, 1);
 }
 
 __m256d test_mm512_mask_extractf64x4_pd(__m256d  __W,__mmask8  __U,__m512d __A){
- //CHECK-LABEL:@test_mm512_mask_extractf64x4_pd
- //CHECL:@llvm.x86.avx512.mask.vextractf64x4.512
- return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
+  // CHECK-LABEL:@test_mm512_mask_extractf64x4_pd
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
 }
 
 __m256d test_mm512_maskz_extractf64x4_pd(__mmask8  __U,__m512d __A){
- //CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd
- //CHECL:@llvm.x86.avx512.mask.vextractf64x4.512
- return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
+  // CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd
+  // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+  return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
 }
 
 __m128 test_mm512_extractf32x4_ps(__m512 a)
 {
   // CHECK-LABEL: @test_mm512_extractf32x4_ps
-  // CHECK: @llvm.x86.avx512.mask.vextractf32x4.512
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm512_extractf32x4_ps(a, 1);
 }
 
 __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8  __U,__m512d __A){
- //CHECK-LABEL:@test_mm512_mask_extractf32x4_ps
- //CHECL: @llvm.x86.avx512.mask.vextractf32x4.512
- return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
+  // CHECK-LABEL:@test_mm512_mask_extractf32x4_ps
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
 }
 
 __m128 test_mm512_maskz_extractf32x4_ps( __mmask8  __U,__m512d __A){
- //CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps
- //CHECL: @llvm.x86.avx512.mask.vextractf32x4.512
+  // CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps
+  // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
  return _mm512_maskz_extractf32x4_ps(  __U, __A, 1);
 }
 
@@ -5097,37 +5101,41 @@ void test_mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
 
 __m128i test_mm512_extracti32x4_epi32(__m512i __A) {
   // CHECK-LABEL: @test_mm512_extracti32x4_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
   return _mm512_extracti32x4_epi32(__A, 3); 
 }
 
 __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3); 
 }
 
 __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+  // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
   return _mm512_maskz_extracti32x4_epi32(__U, __A, 3); 
 }
 
 __m256i test_mm512_extracti64x4_epi64(__m512i __A) {
   // CHECK-LABEL: @test_mm512_extracti64x4_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x4
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm512_extracti64x4_epi64(__A, 1); 
 }
 
 __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x4
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1); 
 }
 
 __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x4
+  // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
   return _mm512_maskz_extracti64x4_epi64(__U, __A, 1); 
 }
 
index b6cb5fb..147522b 100644 (file)
@@ -6555,37 +6555,41 @@ void test_mm256_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A
 
 __m128 test_mm256_extractf32x4_ps(__m256 __A) {
   // CHECK-LABEL: @test_mm256_extractf32x4_ps
-  // CHECK: @llvm.x86.avx512.mask.vextractf32x4
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm256_extractf32x4_ps(__A, 1); 
 }
 
 __m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_extractf32x4_ps
-  // CHECK: @llvm.x86.avx512.mask.vextractf32x4
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1); 
 }
 
 __m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps
-  // CHECK: @llvm.x86.avx512.mask.vextractf32x4
+  // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_maskz_extractf32x4_ps(__U, __A, 1); 
 }
 
 __m128i test_mm256_extracti32x4_epi32(__m256i __A) {
   // CHECK-LABEL: @test_mm256_extracti32x4_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   return _mm256_extracti32x4_epi32(__A, 1); 
 }
 
 __m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1); 
 }
 
 __m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32
-  // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+  // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   return _mm256_maskz_extracti32x4_epi32(__U, __A, 1); 
 }
 
index 1ac56ef..1b953ae 100644 (file)
@@ -992,37 +992,41 @@ __m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
 
 __m128d test_mm256_extractf64x2_pd(__m256d __A) {
   // CHECK-LABEL: @test_mm256_extractf64x2_pd
-  // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
   return _mm256_extractf64x2_pd(__A, 1); 
 }
 
 __m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_mask_extractf64x2_pd
-  // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1); 
 }
 
 __m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) {
   // CHECK-LABEL: @test_mm256_maskz_extractf64x2_pd
-  // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+  // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
   return _mm256_maskz_extractf64x2_pd(__U, __A, 1); 
 }
 
 __m128i test_mm256_extracti64x2_epi64(__m256i __A) {
   // CHECK-LABEL: @test_mm256_extracti64x2_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   return _mm256_extracti64x2_epi64(__A, 1); 
 }
 
 __m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_mask_extracti64x2_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1); 
 }
 
 __m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: @test_mm256_maskz_extracti64x2_epi64
-  // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+  // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
   return _mm256_maskz_extracti64x2_epi64(__U, __A, 1); 
 }