re PR target/78451 (FAIL: gcc.target/i386/sse-22a.c: error: inlining failed in call...
authorJakub Jelinek <jakub@redhat.com>
Tue, 22 Nov 2016 16:53:35 +0000 (17:53 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 22 Nov 2016 16:53:35 +0000 (17:53 +0100)
PR target/78451
* config/i386/avx512vlintrin.h (_mm_setzero_di): Removed.
(_mm_maskz_mov_epi64): Use _mm_setzero_si128 instead of
_mm_setzero_di.
(_mm_maskz_load_epi64): Likewise.
(_mm_setzero_hi): Removed.
(_mm_maskz_loadu_epi64): Use _mm_setzero_si128 instead of
_mm_setzero_di.
(_mm_abs_epi64, _mm_maskz_abs_epi64, _mm_maskz_srl_epi64,
_mm_maskz_unpackhi_epi64, _mm_maskz_unpacklo_epi64,
_mm_maskz_compress_epi64, _mm_srav_epi64, _mm_maskz_srav_epi64,
_mm_maskz_sllv_epi64, _mm_maskz_srlv_epi64, _mm_rolv_epi64,
_mm_maskz_rolv_epi64, _mm_rorv_epi64, _mm_maskz_rorv_epi64,
_mm_min_epi64, _mm_max_epi64, _mm_max_epu64, _mm_min_epu64,
_mm_lzcnt_epi64, _mm_maskz_lzcnt_epi64, _mm_conflict_epi64,
_mm_maskz_conflict_epi64, _mm_sra_epi64, _mm_maskz_sra_epi64,
_mm_maskz_sll_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64,
_mm_ror_epi64, _mm_maskz_ror_epi64, _mm_alignr_epi64,
_mm_maskz_alignr_epi64, _mm_srai_epi64, _mm_maskz_slli_epi64):
Likewise.
(_mm_cvtepi32_epi8, _mm256_cvtepi32_epi8, _mm_cvtsepi32_epi8,
_mm256_cvtsepi32_epi8, _mm_cvtusepi32_epi8, _mm256_cvtusepi32_epi8,
_mm_cvtepi32_epi16, _mm256_cvtepi32_epi16, _mm_cvtsepi32_epi16,
_mm256_cvtsepi32_epi16, _mm_cvtusepi32_epi16, _mm256_cvtusepi32_epi16,
_mm_cvtepi64_epi8, _mm256_cvtepi64_epi8, _mm_cvtsepi64_epi8,
_mm256_cvtsepi64_epi8, _mm_cvtusepi64_epi8, _mm256_cvtusepi64_epi8,
_mm_cvtepi64_epi16, _mm256_cvtepi64_epi16, _mm_cvtsepi64_epi16,
_mm256_cvtsepi64_epi16, _mm_cvtusepi64_epi16, _mm256_cvtusepi64_epi16,
_mm_cvtepi64_epi32, _mm256_cvtepi64_epi32, _mm_cvtsepi64_epi32,
_mm256_cvtsepi64_epi32, _mm_cvtusepi64_epi32, _mm256_cvtusepi64_epi32,
_mm_maskz_set1_epi32, _mm_maskz_set1_epi64): Formatting fixes.
(_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128
instead of _mm_setzero_hi.
(_mm256_permutex_pd, _mm256_maskz_permutex_epi64, _mm256_insertf32x4,
_mm256_maskz_insertf32x4, _mm256_inserti32x4, _mm256_maskz_inserti32x4,
_mm256_extractf32x4_ps, _mm256_maskz_extractf32x4_ps,
_mm256_shuffle_i32x4, _mm256_maskz_shuffle_i32x4, _mm256_shuffle_f64x2,
_mm256_maskz_shuffle_f64x2, _mm256_shuffle_f32x4,
_mm256_maskz_shuffle_f32x4, _mm256_maskz_shuffle_pd,
_mm_maskz_shuffle_pd, _mm256_maskz_shuffle_ps, _mm_maskz_shuffle_ps,
_mm256_maskz_srli_epi32, _mm_maskz_srli_epi32, _mm_maskz_srli_epi64,
_mm256_mask_slli_epi32, _mm256_maskz_slli_epi32, _mm256_mask_slli_epi64,
_mm256_maskz_slli_epi64, _mm256_roundscale_ps,
_mm256_maskz_roundscale_ps, _mm256_roundscale_pd,
_mm256_maskz_roundscale_pd, _mm_roundscale_ps, _mm_maskz_roundscale_ps,
_mm_roundscale_pd, _mm_maskz_roundscale_pd, _mm256_getmant_ps,
_mm256_maskz_getmant_ps, _mm_getmant_ps, _mm_maskz_getmant_ps,
_mm256_getmant_pd, _mm256_maskz_getmant_pd, _mm_getmant_pd,
_mm_maskz_getmant_pd, _mm256_maskz_shuffle_epi32,
_mm_maskz_shuffle_epi32, _mm256_rol_epi32, _mm256_maskz_rol_epi32,
_mm_rol_epi32, _mm_maskz_rol_epi32, _mm256_ror_epi32,
_mm256_maskz_ror_epi32, _mm_ror_epi32, _mm_maskz_ror_epi32,
_mm_maskz_alignr_epi32, _mm_maskz_alignr_epi64,
_mm256_maskz_srai_epi32, _mm_maskz_srai_epi32, _mm_srai_epi64,
_mm_maskz_srai_epi64, _mm256_maskz_permutex_pd,
_mm256_maskz_permute_pd, _mm256_maskz_permute_ps, _mm_maskz_permute_pd,
_mm_maskz_permute_ps, _mm256_permutexvar_ps): Formatting fixes.
(_mm_maskz_slli_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64,
_mm_ror_epi64, _mm_maskz_ror_epi64): Use _mm_setzero_si128 instead of
_mm_setzero_di.
(_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128
instead of _mm_setzero_hi.
* config/i386/avx512dqintrin.h (_mm512_broadcast_f64x2,
_mm512_broadcast_i64x2, _mm512_broadcast_f32x2, _mm512_broadcast_i32x2,
_mm512_broadcast_f32x8, _mm512_broadcast_i32x8): Formatting fixes.
(_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use
_mm_setzero_si128 instead of _mm_setzero_di.
(_mm512_cvtt_roundpd_epi64, _mm512_mask_cvtt_roundpd_epi64,
_mm512_maskz_cvtt_roundpd_epi64, _mm512_cvtt_roundpd_epu64,
_mm512_mask_cvtt_roundpd_epu64, _mm512_maskz_cvtt_roundpd_epu64,
_mm512_cvtt_roundps_epi64, _mm512_mask_cvtt_roundps_epi64,
_mm512_maskz_cvtt_roundps_epi64, _mm512_cvtt_roundps_epu64,
_mm512_mask_cvtt_roundps_epu64, _mm512_maskz_cvtt_roundps_epu64,
_mm512_cvt_roundpd_epi64, _mm512_mask_cvt_roundpd_epi64,
_mm512_maskz_cvt_roundpd_epi64, _mm512_cvt_roundpd_epu64,
_mm512_mask_cvt_roundpd_epu64, _mm512_maskz_cvt_roundpd_epu64,
_mm512_cvt_roundps_epi64, _mm512_mask_cvt_roundps_epi64,
_mm512_maskz_cvt_roundps_epi64, _mm512_cvt_roundps_epu64,
_mm512_mask_cvt_roundps_epu64, _mm512_maskz_cvt_roundps_epu64,
_mm512_cvt_roundepi64_ps, _mm512_mask_cvt_roundepi64_ps,
_mm512_maskz_cvt_roundepi64_ps, _mm512_cvt_roundepu64_ps,
_mm512_mask_cvt_roundepu64_ps, _mm512_maskz_cvt_roundepu64_ps,
_mm512_cvt_roundepi64_pd, _mm512_mask_cvt_roundepi64_pd,
_mm512_maskz_cvt_roundepi64_pd, _mm512_cvt_roundepu64_pd,
_mm512_mask_cvt_roundepu64_pd, _mm512_maskz_cvt_roundepu64_pd,
_mm512_reduce_pd, _mm512_maskz_reduce_pd, _mm512_reduce_ps,
_mm512_maskz_reduce_ps, _mm512_extractf32x8_ps,
_mm512_maskz_extractf32x8_ps, _mm512_extractf64x2_pd,
_mm512_maskz_extractf64x2_pd, _mm512_extracti32x8_epi32,
_mm512_maskz_extracti32x8_epi32, _mm512_range_pd,
_mm512_maskz_range_pd, _mm512_range_ps, _mm512_maskz_range_ps,
_mm512_range_round_pd, _mm512_maskz_range_round_pd,
_mm512_range_round_ps, _mm512_maskz_range_round_ps,
_mm512_maskz_insertf64x2, _mm512_insertf32x8,
_mm512_maskz_insertf32x8): Formatting fixes.
(_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use
_mm_setzero_si128 instead of _mm_setzero_di.
* config/i386/avx512vldqintrin.h (_mm_cvttpd_epi64,
_mm_cvttpd_epu64, _mm_cvtpd_epi64, _mm_cvtpd_epu64,
_mm_cvttps_epi64, _mm_maskz_cvttps_epi64, _mm_cvttps_epu64,
_mm_maskz_cvttps_epu64, _mm_maskz_mullo_epi64, _mm_cvtps_epi64,
_mm_maskz_cvtps_epi64, _mm_cvtps_epu64, _mm_maskz_cvtps_epu64,
_mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64): Use
_mm_setzero_si128 instead of _mm_setzero_di.
(_mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64):
Likewise in macros.
* config/i386/avx512vlbwintrin.h (_mm_maskz_mov_epi8,
_mm_maskz_loadu_epi16, _mm_maskz_mov_epi16, _mm_maskz_loadu_epi8,
_mm_permutexvar_epi16, _mm_maskz_maddubs_epi16): Use
_mm_setzero_si128 instead of _mm_setzero_hi.
(_mm_maskz_min_epu16, _mm_maskz_max_epu8, _mm_maskz_max_epi8,
_mm_maskz_min_epu8, _mm_maskz_min_epi8, _mm_maskz_max_epi16,
_mm_maskz_max_epu16, _mm_maskz_min_epi16): Use _mm_setzero_si128
instead of _mm_setzero_di.
(_mm_dbsad_epu8, _mm_maskz_shufflehi_epi16,
_mm_maskz_shufflelo_epi16): Use _mm_setzero_si128 instead of
_mm_setzero_hi.
(_mm_maskz_shufflehi_epi16, _mm_maskz_shufflelo_epi16,
_mm_maskz_slli_epi16): Use _mm_setzero_si128 instead of
_mm_setzero_hi.
(_mm_maskz_alignr_epi8): Use _mm_setzero_si128 instead of
_mm_setzero_di.
(_mm_maskz_mulhi_epi16, _mm_maskz_mulhi_epu16, _mm_maskz_mulhrs_epi16,
_mm_maskz_mullo_epi16, _mm_srav_epi16, _mm_srlv_epi16,
_mm_sllv_epi16): Use _mm_setzero_si128 instead of _mm_setzero_hi.

From-SVN: r242707

gcc/ChangeLog
gcc/config/i386/avx512dqintrin.h
gcc/config/i386/avx512vlbwintrin.h
gcc/config/i386/avx512vldqintrin.h
gcc/config/i386/avx512vlintrin.h

index a81c423..179a26d 100644 (file)
@@ -1,3 +1,131 @@
+2016-11-22  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/78451
+       * config/i386/avx512vlintrin.h (_mm_setzero_di): Removed.
+       (_mm_maskz_mov_epi64): Use _mm_setzero_si128 instead of
+       _mm_setzero_di.
+       (_mm_maskz_load_epi64): Likewise.
+       (_mm_setzero_hi): Removed.
+       (_mm_maskz_loadu_epi64): Use _mm_setzero_si128 instead of
+       _mm_setzero_di.
+       (_mm_abs_epi64, _mm_maskz_abs_epi64, _mm_maskz_srl_epi64,
+       _mm_maskz_unpackhi_epi64, _mm_maskz_unpacklo_epi64,
+       _mm_maskz_compress_epi64, _mm_srav_epi64, _mm_maskz_srav_epi64,
+       _mm_maskz_sllv_epi64, _mm_maskz_srlv_epi64, _mm_rolv_epi64,
+       _mm_maskz_rolv_epi64, _mm_rorv_epi64, _mm_maskz_rorv_epi64,
+       _mm_min_epi64, _mm_max_epi64, _mm_max_epu64, _mm_min_epu64,
+       _mm_lzcnt_epi64, _mm_maskz_lzcnt_epi64, _mm_conflict_epi64,
+       _mm_maskz_conflict_epi64, _mm_sra_epi64, _mm_maskz_sra_epi64,
+       _mm_maskz_sll_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64,
+       _mm_ror_epi64, _mm_maskz_ror_epi64, _mm_alignr_epi64,
+       _mm_maskz_alignr_epi64, _mm_srai_epi64, _mm_maskz_slli_epi64):
+       Likewise.
+       (_mm_cvtepi32_epi8, _mm256_cvtepi32_epi8, _mm_cvtsepi32_epi8,
+       _mm256_cvtsepi32_epi8, _mm_cvtusepi32_epi8, _mm256_cvtusepi32_epi8,
+       _mm_cvtepi32_epi16, _mm256_cvtepi32_epi16, _mm_cvtsepi32_epi16,
+       _mm256_cvtsepi32_epi16, _mm_cvtusepi32_epi16, _mm256_cvtusepi32_epi16,
+       _mm_cvtepi64_epi8, _mm256_cvtepi64_epi8, _mm_cvtsepi64_epi8,
+       _mm256_cvtsepi64_epi8, _mm_cvtusepi64_epi8, _mm256_cvtusepi64_epi8,
+       _mm_cvtepi64_epi16, _mm256_cvtepi64_epi16, _mm_cvtsepi64_epi16,
+       _mm256_cvtsepi64_epi16, _mm_cvtusepi64_epi16, _mm256_cvtusepi64_epi16,
+       _mm_cvtepi64_epi32, _mm256_cvtepi64_epi32, _mm_cvtsepi64_epi32,
+       _mm256_cvtsepi64_epi32, _mm_cvtusepi64_epi32, _mm256_cvtusepi64_epi32,
+       _mm_maskz_set1_epi32, _mm_maskz_set1_epi64): Formatting fixes.
+       (_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128
+       instead of _mm_setzero_hi.
+       (_mm256_permutex_pd, _mm256_maskz_permutex_epi64, _mm256_insertf32x4,
+       _mm256_maskz_insertf32x4, _mm256_inserti32x4, _mm256_maskz_inserti32x4,
+       _mm256_extractf32x4_ps, _mm256_maskz_extractf32x4_ps,
+       _mm256_shuffle_i32x4, _mm256_maskz_shuffle_i32x4, _mm256_shuffle_f64x2,
+       _mm256_maskz_shuffle_f64x2, _mm256_shuffle_f32x4,
+       _mm256_maskz_shuffle_f32x4, _mm256_maskz_shuffle_pd,
+       _mm_maskz_shuffle_pd, _mm256_maskz_shuffle_ps, _mm_maskz_shuffle_ps,
+       _mm256_maskz_srli_epi32, _mm_maskz_srli_epi32, _mm_maskz_srli_epi64,
+       _mm256_mask_slli_epi32, _mm256_maskz_slli_epi32, _mm256_mask_slli_epi64,
+       _mm256_maskz_slli_epi64, _mm256_roundscale_ps,
+       _mm256_maskz_roundscale_ps, _mm256_roundscale_pd,
+       _mm256_maskz_roundscale_pd, _mm_roundscale_ps, _mm_maskz_roundscale_ps,
+       _mm_roundscale_pd, _mm_maskz_roundscale_pd, _mm256_getmant_ps,
+       _mm256_maskz_getmant_ps, _mm_getmant_ps, _mm_maskz_getmant_ps,
+       _mm256_getmant_pd, _mm256_maskz_getmant_pd, _mm_getmant_pd,
+       _mm_maskz_getmant_pd, _mm256_maskz_shuffle_epi32,
+       _mm_maskz_shuffle_epi32, _mm256_rol_epi32, _mm256_maskz_rol_epi32,
+       _mm_rol_epi32, _mm_maskz_rol_epi32, _mm256_ror_epi32,
+       _mm256_maskz_ror_epi32, _mm_ror_epi32, _mm_maskz_ror_epi32,
+       _mm_maskz_alignr_epi32, _mm_maskz_alignr_epi64,
+       _mm256_maskz_srai_epi32, _mm_maskz_srai_epi32, _mm_srai_epi64,
+       _mm_maskz_srai_epi64, _mm256_maskz_permutex_pd,
+       _mm256_maskz_permute_pd, _mm256_maskz_permute_ps, _mm_maskz_permute_pd,
+       _mm_maskz_permute_ps, _mm256_permutexvar_ps): Formatting fixes.
+       (_mm_maskz_slli_epi64, _mm_rol_epi64, _mm_maskz_rol_epi64,
+       _mm_ror_epi64, _mm_maskz_ror_epi64): Use _mm_setzero_si128 instead of
+       _mm_setzero_di.
+       (_mm_maskz_cvtps_ph, _mm256_maskz_cvtps_ph): Use _mm_setzero_si128
+       instead of _mm_setzero_hi.
+       * config/i386/avx512dqintrin.h (_mm512_broadcast_f64x2,
+       _mm512_broadcast_i64x2, _mm512_broadcast_f32x2, _mm512_broadcast_i32x2,
+       _mm512_broadcast_f32x8, _mm512_broadcast_i32x8): Formatting fixes.
+       (_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use
+       _mm_setzero_si128 instead of _mm_setzero_di.
+       (_mm512_cvtt_roundpd_epi64, _mm512_mask_cvtt_roundpd_epi64,
+       _mm512_maskz_cvtt_roundpd_epi64, _mm512_cvtt_roundpd_epu64,
+       _mm512_mask_cvtt_roundpd_epu64, _mm512_maskz_cvtt_roundpd_epu64,
+       _mm512_cvtt_roundps_epi64, _mm512_mask_cvtt_roundps_epi64,
+       _mm512_maskz_cvtt_roundps_epi64, _mm512_cvtt_roundps_epu64,
+       _mm512_mask_cvtt_roundps_epu64, _mm512_maskz_cvtt_roundps_epu64,
+       _mm512_cvt_roundpd_epi64, _mm512_mask_cvt_roundpd_epi64,
+       _mm512_maskz_cvt_roundpd_epi64, _mm512_cvt_roundpd_epu64,
+       _mm512_mask_cvt_roundpd_epu64, _mm512_maskz_cvt_roundpd_epu64,
+       _mm512_cvt_roundps_epi64, _mm512_mask_cvt_roundps_epi64,
+       _mm512_maskz_cvt_roundps_epi64, _mm512_cvt_roundps_epu64,
+       _mm512_mask_cvt_roundps_epu64, _mm512_maskz_cvt_roundps_epu64,
+       _mm512_cvt_roundepi64_ps, _mm512_mask_cvt_roundepi64_ps,
+       _mm512_maskz_cvt_roundepi64_ps, _mm512_cvt_roundepu64_ps,
+       _mm512_mask_cvt_roundepu64_ps, _mm512_maskz_cvt_roundepu64_ps,
+       _mm512_cvt_roundepi64_pd, _mm512_mask_cvt_roundepi64_pd,
+       _mm512_maskz_cvt_roundepi64_pd, _mm512_cvt_roundepu64_pd,
+       _mm512_mask_cvt_roundepu64_pd, _mm512_maskz_cvt_roundepu64_pd,
+       _mm512_reduce_pd, _mm512_maskz_reduce_pd, _mm512_reduce_ps,
+       _mm512_maskz_reduce_ps, _mm512_extractf32x8_ps,
+       _mm512_maskz_extractf32x8_ps, _mm512_extractf64x2_pd,
+       _mm512_maskz_extractf64x2_pd, _mm512_extracti32x8_epi32,
+       _mm512_maskz_extracti32x8_epi32, _mm512_range_pd,
+       _mm512_maskz_range_pd, _mm512_range_ps, _mm512_maskz_range_ps,
+       _mm512_range_round_pd, _mm512_maskz_range_round_pd,
+       _mm512_range_round_ps, _mm512_maskz_range_round_ps,
+       _mm512_maskz_insertf64x2, _mm512_insertf32x8,
+       _mm512_maskz_insertf32x8): Formatting fixes.
+       (_mm512_extracti64x2_epi64, _mm512_maskz_extracti64x2_epi64): Use
+       _mm_setzero_si128 instead of _mm_setzero_di.
+       * config/i386/avx512vldqintrin.h (_mm_cvttpd_epi64,
+       _mm_cvttpd_epu64, _mm_cvtpd_epi64, _mm_cvtpd_epu64,
+       _mm_cvttps_epi64, _mm_maskz_cvttps_epi64, _mm_cvttps_epu64,
+       _mm_maskz_cvttps_epu64, _mm_maskz_mullo_epi64, _mm_cvtps_epi64,
+       _mm_maskz_cvtps_epi64, _mm_cvtps_epu64, _mm_maskz_cvtps_epu64,
+       _mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64): Use
+       _mm_setzero_si128 instead of _mm_setzero_di.
+       (_mm256_extracti64x2_epi64, _mm256_maskz_extracti64x2_epi64):
+       Likewise in macros.
+       * config/i386/avx512vlbwintrin.h (_mm_maskz_mov_epi8,
+       _mm_maskz_loadu_epi16, _mm_maskz_mov_epi16, _mm_maskz_loadu_epi8,
+       _mm_permutexvar_epi16, _mm_maskz_maddubs_epi16): Use
+       _mm_setzero_si128 instead of _mm_setzero_hi.
+       (_mm_maskz_min_epu16, _mm_maskz_max_epu8, _mm_maskz_max_epi8,
+       _mm_maskz_min_epu8, _mm_maskz_min_epi8, _mm_maskz_max_epi16,
+       _mm_maskz_max_epu16, _mm_maskz_min_epi16): Use _mm_setzero_si128
+       instead of _mm_setzero_di.
+       (_mm_dbsad_epu8, _mm_maskz_shufflehi_epi16,
+       _mm_maskz_shufflelo_epi16): Use _mm_setzero_si128 instead of
+       _mm_setzero_hi.
+       (_mm_maskz_shufflehi_epi16, _mm_maskz_shufflelo_epi16,
+       _mm_maskz_slli_epi16): Use _mm_setzero_si128 instead of
+       _mm_setzero_hi.
+       (_mm_maskz_alignr_epi8): Use _mm_setzero_si128 instead of
+       _mm_setzero_di.
+       (_mm_maskz_mulhi_epi16, _mm_maskz_mulhi_epu16, _mm_maskz_mulhrs_epi16,
+       _mm_maskz_mullo_epi16, _mm_srav_epi16, _mm_srlv_epi16,
+       _mm_sllv_epi16): Use _mm_setzero_si128 instead of _mm_setzero_hi.
+
 2016-11-22  Carl Love  <cel@us.ibm.com>
 
        * config/rs6000/rs6000-c.c: Add built-in support for vector compare
index 1dbb6b0..4b954f9 100644 (file)
@@ -38,10 +38,10 @@ extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_f64x2 (__m128d __A)
 {
-  return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df)
-                                                          __A,
-                                                          _mm512_undefined_pd(),
-                                                          (__mmask8) -1);
+  return (__m512d)
+        __builtin_ia32_broadcastf64x2_512_mask ((__v2df) __A,
+                                                _mm512_undefined_pd (),
+                                                (__mmask8) -1);
 }
 
 extern __inline __m512d
@@ -69,10 +69,10 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_i64x2 (__m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di)
-                                                          __A,
-                                                          _mm512_undefined_epi32(),
-                                                          (__mmask8) -1);
+  return (__m512i)
+        __builtin_ia32_broadcasti64x2_512_mask ((__v2di) __A,
+                                                _mm512_undefined_epi32 (),
+                                                (__mmask8) -1);
 }
 
 extern __inline __m512i
@@ -100,9 +100,10 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_f32x2 (__m128 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
-                                                         (__v16sf)_mm512_undefined_ps(),
-                                                         (__mmask16) -1);
+  return (__m512)
+        __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
+                                                (__v16sf)_mm512_undefined_ps (),
+                                                (__mmask16) -1);
 }
 
 extern __inline __m512
@@ -128,10 +129,11 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_i32x2 (__m128i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si)
-                                                          __A,
-                                                          (__v16si)_mm512_undefined_epi32(),
-                                                          (__mmask16) -1);
+  return (__m512i)
+        __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
+                                                (__v16si)
+                                                _mm512_undefined_epi32 (),
+                                                (__mmask16) -1);
 }
 
 extern __inline __m512i
@@ -159,9 +161,10 @@ extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_f32x8 (__m256 __A)
 {
-  return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
-                                                         _mm512_undefined_ps(),
-                                                         (__mmask16) -1);
+  return (__m512)
+        __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A,
+                                                _mm512_undefined_ps (),
+                                                (__mmask16) -1);
 }
 
 extern __inline __m512
@@ -187,10 +190,11 @@ extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_i32x8 (__m256i __A)
 {
-  return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si)
-                                                          __A,
-                                                          (__v16si)_mm512_undefined_epi32(),
-                                                          (__mmask16) -1);
+  return (__m512i)
+        __builtin_ia32_broadcasti32x8_512_mask ((__v8si) __A,
+                                                (__v16si)
+                                                _mm512_undefined_epi32 (),
+                                                (__mmask16) -1);
 }
 
 extern __inline __m512i
@@ -1632,7 +1636,7 @@ _mm512_extracti64x2_epi64 (__m512i __A, const int __imm)
   return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
                                                         __imm,
                                                         (__v2di)
-                                                        _mm_setzero_di (),
+                                                        _mm_setzero_si128 (),
                                                         (__mmask8) -1);
 }
 
@@ -1656,7 +1660,7 @@ _mm512_maskz_extracti64x2_epi64 (__mmask8 __U, __m512i __A,
   return (__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di) __A,
                                                         __imm,
                                                         (__v2di)
-                                                        _mm_setzero_di (),
+                                                        _mm_setzero_si128 (),
                                                         (__mmask8)
                                                         __U);
 }
@@ -1946,116 +1950,118 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
     (__v4sf)(__m128)(B), (int)(C), (R)))
 
 #define _mm512_cvtt_roundpd_epi64(A, B)                    \
-    ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+  ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)            \
+                                             _mm512_setzero_si512 (),  \
+                                             -1, (B)))
 
 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, B)  \
-    ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)(W), (U), (B)))
+    ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
 
 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, B)    \
-    ((__m512i)__builtin_ia32_cvttpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+    ((__m512i)__builtin_ia32_cvttpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
 
 #define _mm512_cvtt_roundpd_epu64(A, B)                    \
-    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
 
 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, B)  \
-    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
+    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
 
 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, B)    \
-    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+    ((__m512i)__builtin_ia32_cvttpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
 
 #define _mm512_cvtt_roundps_epi64(A, B)                    \
-    ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+    ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
 
 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, B)  \
-    ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)(W), (U), (B)))
+    ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)(W), (U), (B)))
 
 #define _mm512_maskz_cvtt_roundps_epi64(U, A, B)    \
-    ((__m512i)__builtin_ia32_cvttps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+    ((__m512i)__builtin_ia32_cvttps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
 
 #define _mm512_cvtt_roundps_epu64(A, B)                    \
-    ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+    ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
 
 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, B)  \
-    ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)(W), (U), (B)))
+    ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
 
 #define _mm512_maskz_cvtt_roundps_epu64(U, A, B)    \
-    ((__m512i)__builtin_ia32_cvttps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+    ((__m512i)__builtin_ia32_cvttps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
 
 #define _mm512_cvt_roundpd_epi64(A, B)             \
-    ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+    ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
 
 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)(W), (U), (B)))
+    ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)(W), (U), (B)))
 
 #define _mm512_maskz_cvt_roundpd_epi64(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtpd2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+    ((__m512i)__builtin_ia32_cvtpd2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
 
 #define _mm512_cvt_roundpd_epu64(A, B)             \
-    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
 
 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)(W), (U), (B)))
+    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)(W), (U), (B)))
 
 #define _mm512_maskz_cvt_roundpd_epu64(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+    ((__m512i)__builtin_ia32_cvtpd2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
 
 #define _mm512_cvt_roundps_epi64(A, B)             \
-    ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+    ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
 
 #define _mm512_mask_cvt_roundps_epi64(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)(W), (U), (B)))
+    ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)(W), (U), (B)))
 
 #define _mm512_maskz_cvt_roundps_epi64(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtps2qq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+    ((__m512i)__builtin_ia32_cvtps2qq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
 
 #define _mm512_cvt_roundps_epu64(A, B)             \
-    ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
+    ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), -1, (B)))
 
 #define _mm512_mask_cvt_roundps_epu64(W, U, A, B)   \
-    ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)(W), (U), (B)))
+    ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)(W), (U), (B)))
 
 #define _mm512_maskz_cvt_roundps_epu64(U, A, B)     \
-    ((__m512i)__builtin_ia32_cvtps2uqq512_mask((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
+    ((__m512i)__builtin_ia32_cvtps2uqq512_mask ((A), (__v8di)_mm512_setzero_si512 (), (U), (B)))
 
 #define _mm512_cvt_roundepi64_ps(A, B)             \
-    ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
+    ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
 
 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, B)   \
-    ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (W), (U), (B)))
+    ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
 
 #define _mm512_maskz_cvt_roundepi64_ps(U, A, B)     \
-    ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
+    ((__m256)__builtin_ia32_cvtqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
 
 #define _mm512_cvt_roundepu64_ps(A, B)             \
-    ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), -1, (B)))
+    ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), -1, (B)))
 
 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, B)   \
-    ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (W), (U), (B)))
+    ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (W), (U), (B)))
 
 #define _mm512_maskz_cvt_roundepu64_ps(U, A, B)     \
-    ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(A), (__v8sf)_mm256_setzero_ps(), (U), (B)))
+    ((__m256)__builtin_ia32_cvtuqq2ps512_mask ((__v8di)(A), (__v8sf)_mm256_setzero_ps (), (U), (B)))
 
 #define _mm512_cvt_roundepi64_pd(A, B)             \
-    ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
+    ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
 
 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, B)   \
-    ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (W), (U), (B)))
+    ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
 
 #define _mm512_maskz_cvt_roundepi64_pd(U, A, B)     \
-    ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
+    ((__m512d)__builtin_ia32_cvtqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
 
 #define _mm512_cvt_roundepu64_pd(A, B)             \
-    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), -1, (B)))
+    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), -1, (B)))
 
 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, B)   \
-    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (W), (U), (B)))
+    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (W), (U), (B)))
 
 #define _mm512_maskz_cvt_roundepu64_pd(U, A, B)     \
-    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(A), (__v8df)_mm512_setzero_pd(), (U), (B)))
+    ((__m512d)__builtin_ia32_cvtuqq2pd512_mask ((__v8di)(A), (__v8df)_mm512_setzero_pd (), (U), (B)))
 
 #define _mm512_reduce_pd(A, B)                                         \
   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),    \
-    (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)-1))
+    (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
 
 #define _mm512_mask_reduce_pd(W, U, A, B)                              \
   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),    \
@@ -2063,11 +2069,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 
 #define _mm512_maskz_reduce_pd(U, A, B)                                        \
   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),    \
-    (int)(B), (__v8df)_mm512_setzero_pd(), (__mmask8)(U)))
+    (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
 
 #define _mm512_reduce_ps(A, B)                                         \
   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),     \
-    (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)-1))
+    (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
 
 #define _mm512_mask_reduce_ps(W, U, A, B)                              \
   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),     \
@@ -2075,11 +2081,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 
 #define _mm512_maskz_reduce_ps(U, A, B)                                        \
   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),     \
-    (int)(B), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U)))
+    (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
 
 #define _mm512_extractf32x8_ps(X, C)                                    \
   ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
-    (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8)-1))
+    (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
 
 #define _mm512_mask_extractf32x8_ps(W, U, X, C)                         \
   ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
@@ -2087,11 +2093,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 
 #define _mm512_maskz_extractf32x8_ps(U, X, C)                           \
   ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
-    (int) (C), (__v8sf)(__m256) _mm256_setzero_ps(), (__mmask8) (U)))
+    (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8) (U)))
 
 #define _mm512_extractf64x2_pd(X, C)                                    \
   ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
-    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1))
+    (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8)-1))
 
 #define _mm512_mask_extractf64x2_pd(W, U, X, C)                         \
   ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
@@ -2099,11 +2105,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 
 #define _mm512_maskz_extractf64x2_pd(U, X, C)                           \
   ((__m128d) __builtin_ia32_extractf64x2_512_mask ((__v8df)(__m512d) (X),\
-    (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U)))
+    (int) (C), (__v2df)(__m128d) _mm_setzero_pd (), (__mmask8) (U)))
 
 #define _mm512_extracti32x8_epi32(X, C)                                 \
   ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
-    (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8)-1))
+    (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8)-1))
 
 #define _mm512_mask_extracti32x8_epi32(W, U, X, C)                      \
   ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
@@ -2111,11 +2117,11 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 
 #define _mm512_maskz_extracti32x8_epi32(U, X, C)                        \
   ((__m256i) __builtin_ia32_extracti32x8_mask ((__v16si)(__m512i) (X),  \
-    (int) (C), (__v8si)(__m256i) _mm256_setzero_si256(), (__mmask8) (U)))
+    (int) (C), (__v8si)(__m256i) _mm256_setzero_si256 (), (__mmask8) (U)))
 
 #define _mm512_extracti64x2_epi64(X, C)                                 \
   ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
+    (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
 
 #define _mm512_mask_extracti64x2_epi64(W, U, X, C)                      \
   ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
@@ -2123,12 +2129,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 
 #define _mm512_maskz_extracti64x2_epi64(U, X, C)                        \
   ((__m128i) __builtin_ia32_extracti64x2_512_mask ((__v8di)(__m512i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
+    (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
 
 #define _mm512_range_pd(A, B, C)                                       \
   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),     \
     (__v8df)(__m512d)(B), (int)(C),                                    \
-    (__v8df)_mm512_setzero_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
+    (__v8df)_mm512_setzero_pd (), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_range_pd(W, U, A, B, C)                            \
   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),     \
@@ -2138,12 +2144,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _mm512_maskz_range_pd(U, A, B, C)                              \
   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),     \
     (__v8df)(__m512d)(B), (int)(C),                                    \
-    (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
+    (__v8df)_mm512_setzero_pd (), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_range_ps(A, B, C)                                       \
   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),      \
     (__v16sf)(__m512)(B), (int)(C),                                    \
-    (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
+    (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_range_ps(W, U, A, B, C)                            \
   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),      \
@@ -2153,12 +2159,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _mm512_maskz_range_ps(U, A, B, C)                              \
   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),      \
     (__v16sf)(__m512)(B), (int)(C),                                    \
-    (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
+    (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_range_round_pd(A, B, C, R)                                      \
   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),     \
     (__v8df)(__m512d)(B), (int)(C),                                    \
-    (__v8df)_mm512_setzero_pd(), (__mmask8)-1, (R)))
+    (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
 
 #define _mm512_mask_range_round_pd(W, U, A, B, C, R)                           \
   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),     \
@@ -2168,12 +2174,12 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _mm512_maskz_range_round_pd(U, A, B, C, R)                             \
   ((__m512d) __builtin_ia32_rangepd512_mask ((__v8df)(__m512d)(A),     \
     (__v8df)(__m512d)(B), (int)(C),                                    \
-    (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (R)))
+    (__v8df)_mm512_setzero_pd (), (__mmask8)(U), (R)))
 
 #define _mm512_range_round_ps(A, B, C, R)                                      \
   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),      \
     (__v16sf)(__m512)(B), (int)(C),                                    \
-    (__v16sf)_mm512_setzero_ps(), (__mmask16)-1, (R)))
+    (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
 
 #define _mm512_mask_range_round_ps(W, U, A, B, C, R)                           \
   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),      \
@@ -2183,7 +2189,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _mm512_maskz_range_round_ps(U, A, B, C, R)                             \
   ((__m512) __builtin_ia32_rangeps512_mask ((__v16sf)(__m512)(A),      \
     (__v16sf)(__m512)(B), (int)(C),                                    \
-    (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (R)))
+    (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
 
 #define _mm512_insertf64x2(X, Y, C)                                     \
   ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
@@ -2198,7 +2204,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _mm512_maskz_insertf64x2(U, X, Y, C)                            \
   ((__m512d) __builtin_ia32_insertf64x2_512_mask ((__v8df)(__m512d) (X),\
     (__v2df)(__m128d) (Y), (int) (C),                                   \
-    (__v8df)(__m512d) _mm512_setzero_pd(), (__mmask8) (U)))
+    (__v8df)(__m512d) _mm512_setzero_pd (), (__mmask8) (U)))
 
 #define _mm512_inserti64x2(X, Y, C)                                     \
   ((__m512i) __builtin_ia32_inserti64x2_512_mask ((__v8di)(__m512i) (X),\
@@ -2217,7 +2223,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _mm512_insertf32x8(X, Y, C)                                     \
   ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X),     \
     (__v8sf)(__m256) (Y), (int) (C),\
-    (__v16sf)(__m512)_mm512_setzero_ps(),\
+    (__v16sf)(__m512)_mm512_setzero_ps (),\
     (__mmask16)-1))
 
 #define _mm512_mask_insertf32x8(W, U, X, Y, C)                          \
@@ -2229,7 +2235,7 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 #define _mm512_maskz_insertf32x8(U, X, Y, C)                            \
   ((__m512) __builtin_ia32_insertf32x8_mask ((__v16sf)(__m512) (X),     \
     (__v8sf)(__m256) (Y), (int) (C),\
-    (__v16sf)(__m512)_mm512_setzero_ps(),\
+    (__v16sf)(__m512)_mm512_setzero_ps (),\
     (__mmask16)(U)))
 
 #define _mm512_inserti32x8(X, Y, C)                                     \
index 02bbed0..eb384d6 100644 (file)
@@ -69,7 +69,7 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_movdquqi128_mask ((__v16qi) __A,
                                                    (__v16qi)
-                                                   _mm_setzero_hi (),
+                                                   _mm_setzero_si128 (),
                                                    (__mmask16) __U);
 }
 
@@ -125,7 +125,7 @@ _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
                                                     (__v8hi)
-                                                    _mm_setzero_hi (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) __U);
 }
 
@@ -164,7 +164,7 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_movdquhi128_mask ((__v8hi) __A,
                                                    (__v8hi)
-                                                   _mm_setzero_hi (),
+                                                   _mm_setzero_si128 (),
                                                    (__mmask8) __U);
 }
 
@@ -202,7 +202,7 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
                                                     (__v16qi)
-                                                    _mm_setzero_hi (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask16) __U);
 }
 
@@ -541,7 +541,7 @@ _mm_permutexvar_epi16 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
                                                     (__v8hi) __A,
                                                     (__v8hi)
-                                                    _mm_setzero_hi (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) -1);
 }
 
@@ -707,7 +707,7 @@ _mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
                                                     (__v16qi) __Y,
                                                     (__v8hi)
-                                                    _mm_setzero_hi (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) __U);
 }
 
@@ -908,7 +908,7 @@ _mm_maskz_min_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminuw128_mask ((__v8hi) __A,
                                                  (__v8hi) __B,
                                                  (__v8hi)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __M);
 }
 
@@ -974,7 +974,7 @@ _mm_maskz_max_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxub128_mask ((__v16qi) __A,
                                                  (__v16qi) __B,
                                                  (__v16qi)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask16) __M);
 }
 
@@ -1018,7 +1018,7 @@ _mm_maskz_max_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxsb128_mask ((__v16qi) __A,
                                                  (__v16qi) __B,
                                                  (__v16qi)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask16) __M);
 }
 
@@ -1062,7 +1062,7 @@ _mm_maskz_min_epu8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminub128_mask ((__v16qi) __A,
                                                  (__v16qi) __B,
                                                  (__v16qi)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask16) __M);
 }
 
@@ -1106,7 +1106,7 @@ _mm_maskz_min_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminsb128_mask ((__v16qi) __A,
                                                  (__v16qi) __B,
                                                  (__v16qi)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask16) __M);
 }
 
@@ -1150,7 +1150,7 @@ _mm_maskz_max_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxsw128_mask ((__v8hi) __A,
                                                  (__v8hi) __B,
                                                  (__v8hi)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __M);
 }
 
@@ -1194,7 +1194,7 @@ _mm_maskz_max_epu16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxuw128_mask ((__v8hi) __A,
                                                  (__v8hi) __B,
                                                  (__v8hi)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __M);
 }
 
@@ -1216,7 +1216,7 @@ _mm_maskz_min_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminsw128_mask ((__v8hi) __A,
                                                  (__v8hi) __B,
                                                  (__v8hi)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __M);
 }
 
@@ -1327,7 +1327,7 @@ _mm_dbsad_epu8 (__m128i __A, __m128i __B, const int __imm)
                                                    (__v16qi) __B,
                                                    __imm,
                                                    (__v8hi)
-                                                   _mm_setzero_hi (),
+                                                   _mm_setzero_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -1623,7 +1623,7 @@ _mm_maskz_shufflehi_epi16 (__mmask8 __U, __m128i __A, const int __imm)
 {
   return (__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi) __A, __imm,
                                                   (__v8hi)
-                                                  _mm_setzero_hi (),
+                                                  _mm_setzero_si128 (),
                                                   (__mmask8) __U);
 }
 
@@ -1666,7 +1666,7 @@ _mm_maskz_shufflelo_epi16 (__mmask8 __U, __m128i __A, const int __imm)
 {
   return (__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi) __A, __imm,
                                                   (__v8hi)
-                                                  _mm_setzero_hi (),
+                                                  _mm_setzero_si128 (),
                                                   (__mmask8) __U);
 }
 
@@ -1804,7 +1804,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
 
 #define _mm_maskz_shufflehi_epi16(U, A, B)                                          \
   ((__m128i) __builtin_ia32_pshufhw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
-                                             (__v8hi)(__m128i)_mm_setzero_hi(),     \
+                                            (__v8hi)(__m128i)_mm_setzero_si128 (), \
                                              (__mmask8)(U)))
 
 #define _mm256_mask_shufflelo_epi16(W, U, A, B)                                     \
@@ -1824,7 +1824,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
 
 #define _mm_maskz_shufflelo_epi16(U, A, B)                                          \
   ((__m128i) __builtin_ia32_pshuflw128_mask ((__v8hi)(__m128i)(A), (int)(B),        \
-                                             (__v8hi)(__m128i)_mm_setzero_hi(),     \
+                                            (__v8hi)(__m128i)_mm_setzero_si128 (), \
                                              (__mmask8)(U)))
 
 #define _mm256_maskz_alignr_epi8(U, X, Y, N)                                       \
@@ -1841,7 +1841,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
 #define _mm_maskz_alignr_epi8(U, X, Y, N)                                          \
   ((__m128i) __builtin_ia32_palignr128_mask ((__v2di)(__m128i)(X),                 \
                                            (__v2di)(__m128i)(Y), (int)(N * 8),     \
-                                           (__v2di)(__m128i)_mm_setzero_di(),      \
+                                           (__v2di)(__m128i)_mm_setzero_si128 (),  \
                                            (__mmask16)(U)))
 
 #define _mm_mask_slli_epi16(W, U, X, C)                                          \
@@ -1851,7 +1851,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
 
 #define _mm_maskz_slli_epi16(U, X, C)                                    \
   ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
-    (__v8hi)(__m128i)_mm_setzero_hi(),\
+    (__v8hi)(__m128i)_mm_setzero_si128 (),\
     (__mmask8)(U)))
 
 #define _mm256_dbsad_epu8(X, Y, C)                                                  \
@@ -2301,7 +2301,7 @@ _mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
                                                  (__v8hi) __B,
                                                  (__v8hi)
-                                                 _mm_setzero_hi (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -2323,7 +2323,7 @@ _mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
                                                   (__v8hi) __B,
                                                   (__v8hi)
-                                                  _mm_setzero_hi (),
+                                                  _mm_setzero_si128 (),
                                                   (__mmask8) __U);
 }
 
@@ -2345,7 +2345,7 @@ _mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
                                                    (__v8hi) __Y,
                                                    (__v8hi)
-                                                   _mm_setzero_hi (),
+                                                   _mm_setzero_si128 (),
                                                    (__mmask8) __U);
 }
 
@@ -2389,7 +2389,7 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmullw128_mask ((__v8hi) __A,
                                                  (__v8hi) __B,
                                                  (__v8hi)
-                                                 _mm_setzero_hi (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -4067,7 +4067,7 @@ _mm_srav_epi16 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrav8hi_mask ((__v8hi) __A,
                                                 (__v8hi) __B,
                                                 (__v8hi)
-                                                _mm_setzero_hi (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) -1);
 }
 
@@ -4133,7 +4133,7 @@ _mm_srlv_epi16 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrlv8hi_mask ((__v8hi) __A,
                                                 (__v8hi) __B,
                                                 (__v8hi)
-                                                _mm_setzero_hi (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) -1);
 }
 
@@ -4199,7 +4199,7 @@ _mm_sllv_epi16 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psllv8hi_mask ((__v8hi) __A,
                                                 (__v8hi) __B,
                                                 (__v8hi)
-                                                _mm_setzero_hi (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) -1);
 }
 
index 5ff0a52..cd0b714 100644 (file)
@@ -69,7 +69,7 @@ _mm_cvttpd_epi64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) -1);
 }
 
@@ -127,7 +127,7 @@ _mm_cvttpd_epu64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
                                                      (__v2di)
-                                                     _mm_setzero_di (),
+                                                     _mm_setzero_si128 (),
                                                      (__mmask8) -1);
 }
 
@@ -185,7 +185,7 @@ _mm_cvtpd_epi64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
                                                    (__v2di)
-                                                   _mm_setzero_di (),
+                                                   _mm_setzero_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -243,7 +243,7 @@ _mm_cvtpd_epu64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) -1);
 }
 
@@ -301,7 +301,7 @@ _mm_cvttps_epi64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) -1);
 }
 
@@ -320,7 +320,7 @@ _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) __U);
 }
 
@@ -359,7 +359,7 @@ _mm_cvttps_epu64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
                                                      (__v2di)
-                                                     _mm_setzero_di (),
+                                                     _mm_setzero_si128 (),
                                                      (__mmask8) -1);
 }
 
@@ -378,7 +378,7 @@ _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
                                                      (__v2di)
-                                                     _mm_setzero_di (),
+                                                     _mm_setzero_si128 (),
                                                      (__mmask8) __U);
 }
 
@@ -588,7 +588,7 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -714,7 +714,7 @@ _mm_cvtps_epi64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
                                                    (__v2di)
-                                                   _mm_setzero_di (),
+                                                   _mm_setzero_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -733,7 +733,7 @@ _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
                                                    (__v2di)
-                                                   _mm_setzero_di (),
+                                                   _mm_setzero_si128 (),
                                                    (__mmask8) __U);
 }
 
@@ -772,7 +772,7 @@ _mm_cvtps_epu64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) -1);
 }
 
@@ -791,7 +791,7 @@ _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) __U);
 }
 
@@ -1381,7 +1381,7 @@ _mm256_extracti64x2_epi64 (__m256i __A, const int __imm)
   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
                                                         __imm,
                                                         (__v2di)
-                                                        _mm_setzero_di (),
+                                                        _mm_setzero_si128 (),
                                                         (__mmask8) -1);
 }
 
@@ -1405,7 +1405,7 @@ _mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A,
   return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A,
                                                         __imm,
                                                         (__v2di)
-                                                        _mm_setzero_di (),
+                                                        _mm_setzero_si128 (),
                                                         (__mmask8)
                                                         __U);
 }
@@ -1856,7 +1856,7 @@ _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 
 #define _mm256_extracti64x2_epi64(X, C)                                 \
   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1))
+    (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8)-1))
 
 #define _mm256_mask_extracti64x2_epi64(W, U, X, C)                     \
   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
@@ -1864,7 +1864,7 @@ _mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B,
 
 #define _mm256_maskz_extracti64x2_epi64(U, X, C)                        \
   ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\
-    (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U)))
+    (int) (C), (__v2di)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
 
 #define _mm256_reduce_pd(A, B)                                         \
   ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A),    \
index f339ed3..f83bfe2 100644 (file)
 #ifndef _AVX512VLINTRIN_H_INCLUDED
 #define _AVX512VLINTRIN_H_INCLUDED
 
-/* Doesn't require avx512vl target and is used in avx512dqintrin.h.  */
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_di (void)
-{
-  return __extension__ (__m128i)(__v2di){ 0LL, 0LL};
-}
-
 #ifndef __AVX512VL__
 #pragma GCC push_options
 #pragma GCC target("avx512vl")
@@ -267,7 +259,7 @@ _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) __U);
 }
 
@@ -308,7 +300,7 @@ _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
                                                        (__v2di)
-                                                       _mm_setzero_di (),
+                                                       _mm_setzero_si128 (),
                                                        (__mmask8)
                                                        __U);
 }
@@ -429,15 +421,6 @@ _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
                                        (__mmask8) __U);
 }
 
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_setzero_hi (void)
-{
-  return __extension__ (__m128i) (__v8hi)
-  {
-  0, 0, 0, 0, 0, 0, 0, 0};
-}
-
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
@@ -768,7 +751,7 @@ _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) __U);
 }
 
@@ -919,7 +902,7 @@ _mm_abs_epi64 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) -1);
 }
 
@@ -938,7 +921,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) __U);
 }
 
@@ -1465,7 +1448,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtepi32_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
-                                                 (__v16qi)_mm_undefined_si128(),
+                                                 (__v16qi)
+                                                 _mm_undefined_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -1499,7 +1483,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtepi32_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
-                                                 (__v16qi)_mm_undefined_si128(),
+                                                 (__v16qi)
+                                                 _mm_undefined_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -1533,7 +1518,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsepi32_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
-                                                  (__v16qi)_mm_undefined_si128(),
+                                                  (__v16qi)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -1567,7 +1553,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtsepi32_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
-                                                  (__v16qi)_mm_undefined_si128(),
+                                                  (__v16qi)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -1601,7 +1588,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtusepi32_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
-                                                   (__v16qi)_mm_undefined_si128(),
+                                                   (__v16qi)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -1636,7 +1624,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtusepi32_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
-                                                   (__v16qi)_mm_undefined_si128(),
+                                                   (__v16qi)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -1671,7 +1660,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtepi32_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
-                                                 (__v8hi) _mm_setzero_si128 (),
+                                                 (__v8hi)
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -1705,7 +1695,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtepi32_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
-                                                 (__v8hi)_mm_setzero_si128 (),
+                                                 (__v8hi)
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -1739,7 +1730,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsepi32_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
-                                                  (__v8hi)_mm_setzero_si128 (),
+                                                  (__v8hi)
+                                                  _mm_setzero_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -1774,7 +1766,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtsepi32_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
-                                                  (__v8hi)_mm_undefined_si128(),
+                                                  (__v8hi)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -1808,7 +1801,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtusepi32_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
-                                                   (__v8hi)_mm_undefined_si128(),
+                                                   (__v8hi)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -1842,7 +1836,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtusepi32_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
-                                                   (__v8hi)_mm_undefined_si128(),
+                                                   (__v8hi)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -1876,7 +1871,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtepi64_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
-                                                 (__v16qi)_mm_undefined_si128(),
+                                                 (__v16qi)
+                                                 _mm_undefined_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -1910,7 +1906,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtepi64_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
-                                                 (__v16qi)_mm_undefined_si128(),
+                                                 (__v16qi)
+                                                 _mm_undefined_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -1944,7 +1941,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsepi64_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
-                                                  (__v16qi)_mm_undefined_si128(),
+                                                  (__v16qi)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -1978,7 +1976,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtsepi64_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
-                                                  (__v16qi)_mm_undefined_si128(),
+                                                  (__v16qi)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -2012,7 +2011,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtusepi64_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
-                                                   (__v16qi)_mm_undefined_si128(),
+                                                   (__v16qi)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -2047,7 +2047,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtusepi64_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
-                                                   (__v16qi)_mm_undefined_si128(),
+                                                   (__v16qi)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -2082,7 +2083,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtepi64_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
-                                                 (__v8hi)_mm_undefined_si128(),
+                                                 (__v8hi)
+                                                 _mm_undefined_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -2117,7 +2119,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtepi64_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
-                                                 (__v8hi)_mm_undefined_si128(),
+                                                 (__v8hi)
+                                                 _mm_undefined_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -2151,7 +2154,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsepi64_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
-                                                  (__v8hi)_mm_undefined_si128(),
+                                                  (__v8hi)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -2185,7 +2189,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtsepi64_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
-                                                  (__v8hi)_mm_undefined_si128(),
+                                                  (__v8hi)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -2219,7 +2224,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtusepi64_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
-                                                   (__v8hi)_mm_undefined_si128(),
+                                                   (__v8hi)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -2253,7 +2259,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtusepi64_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
-                                                   (__v8hi)_mm_undefined_si128(),
+                                                   (__v8hi)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -2287,7 +2294,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtepi64_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
-                                                 (__v4si)_mm_undefined_si128(),
+                                                 (__v4si)
+                                                 _mm_undefined_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -2321,7 +2329,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtepi64_epi32 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
-                                                 (__v4si)_mm_undefined_si128(),
+                                                 (__v4si)
+                                                 _mm_undefined_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -2355,7 +2364,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtsepi64_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
-                                                  (__v4si)_mm_undefined_si128(),
+                                                  (__v4si)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -2389,7 +2399,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtsepi64_epi32 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
-                                                  (__v4si)_mm_undefined_si128(),
+                                                  (__v4si)
+                                                  _mm_undefined_si128 (),
                                                   (__mmask8) -1);
 }
 
@@ -2424,7 +2435,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtusepi64_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
-                                                   (__v4si)_mm_undefined_si128(),
+                                                   (__v4si)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -2458,7 +2470,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtusepi64_epi32 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
-                                                   (__v4si)_mm_undefined_si128(),
+                                                   (__v4si)
+                                                   _mm_undefined_si128 (),
                                                    (__mmask8) -1);
 }
 
@@ -2612,10 +2625,10 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_set1_epi32 (__mmask8 __M, int __A)
 {
-  return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A,
-                                                          (__v4si)
-                                                          _mm_setzero_si128 (),
-                                                          __M);
+  return (__m128i)
+        __builtin_ia32_pbroadcastd128_gpr_mask (__A,
+                                                (__v4si) _mm_setzero_si128 (),
+                                                __M);
 }
 
 extern __inline __m256i
@@ -2686,10 +2699,10 @@ extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
-  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
-                                                          (__v2di)
-                                                          _mm_setzero_si128 (),
-                                                          __M);
+  return (__m128i)
+        __builtin_ia32_pbroadcastq128_gpr_mask (__A,
+                                                (__v2di) _mm_setzero_si128 (),
+                                                __M);
 }
 
 extern __inline __m256
@@ -3815,7 +3828,7 @@ _mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
                                                 (__v2di) __B,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) __U);
 }
 
@@ -5217,7 +5230,7 @@ _mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
                                                      (__v2di) __B,
                                                      (__v2di)
-                                                     _mm_setzero_di (),
+                                                     _mm_setzero_si128 (),
                                                      (__mmask8) __U);
 }
 
@@ -5305,7 +5318,7 @@ _mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
                                                      (__v2di) __B,
                                                      (__v2di)
-                                                     _mm_setzero_di (),
+                                                     _mm_setzero_si128 (),
                                                      (__mmask8) __U);
 }
 
@@ -5894,7 +5907,7 @@ _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
                                                      (__v2di)
-                                                     _mm_setzero_di (),
+                                                     _mm_setzero_si128 (),
                                                      (__mmask8) __U);
 }
 
@@ -6678,7 +6691,7 @@ _mm_srav_epi64 (__m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
                                                  (__v2di) __Y,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -6700,7 +6713,7 @@ _mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
                                                  (__v2di) __Y,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -6788,7 +6801,7 @@ _mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
                                                 (__v2di) __Y,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) __U);
 }
 
@@ -6920,7 +6933,7 @@ _mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
   return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
                                                 (__v2di) __Y,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) __U);
 }
 
@@ -7096,7 +7109,7 @@ _mm_rolv_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -7118,7 +7131,7 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -7162,7 +7175,7 @@ _mm_rorv_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -7184,7 +7197,7 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -7972,7 +7985,7 @@ _mm_min_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -8015,7 +8028,7 @@ _mm_max_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -8026,7 +8039,7 @@ _mm_max_epu64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -8047,7 +8060,7 @@ _mm_min_epu64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
                                                  (__v2di) __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -8345,7 +8358,7 @@ _mm_lzcnt_epi64 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) -1);
 }
 
@@ -8364,7 +8377,7 @@ _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
                                                     (__v2di)
-                                                    _mm_setzero_di (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) __U);
 }
 
@@ -8374,7 +8387,7 @@ _mm_conflict_epi64 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
                                                         (__v2di)
-                                                        _mm_setzero_di (),
+                                                        _mm_setzero_si128 (),
                                                         (__mmask8) -1);
 }
 
@@ -8394,7 +8407,7 @@ _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
                                                         (__v2di)
-                                                        _mm_setzero_di (),
+                                                        _mm_setzero_si128 (),
                                                         (__mmask8)
                                                         __U);
 }
@@ -8730,7 +8743,7 @@ _mm_sra_epi64 (__m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
                                                 (__v2di) __B,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) -1);
 }
 
@@ -8752,7 +8765,7 @@ _mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
                                                 (__v2di) __B,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) __U);
 }
 
@@ -8796,7 +8809,7 @@ _mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
                                                 (__v2di) __B,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) __U);
 }
 
@@ -10923,7 +10936,7 @@ _mm_rol_epi64 (__m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) -1);
 }
 
@@ -10943,7 +10956,7 @@ _mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) __U);
 }
 
@@ -10983,7 +10996,7 @@ _mm_ror_epi64 (__m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) -1);
 }
 
@@ -11003,7 +11016,7 @@ _mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
 {
   return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
                                                 (__v2di)
-                                                _mm_setzero_di (),
+                                                _mm_setzero_si128 (),
                                                 (__mmask8) __U);
 }
 
@@ -11048,7 +11061,7 @@ _mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
   return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
                                                  (__v2di) __B, __imm,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -11071,7 +11084,7 @@ _mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
   return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
                                                  (__v2di) __B, __imm,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -11159,7 +11172,7 @@ _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
 {
   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
                                                  (__v8hi)
-                                                 _mm_setzero_hi (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -11179,7 +11192,7 @@ _mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
 {
   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
                                                     (__v8hi)
-                                                    _mm_setzero_hi (),
+                                                    _mm_setzero_si128 (),
                                                     (__mmask8) __U);
 }
 
@@ -11259,7 +11272,7 @@ _mm_srai_epi64 (__m128i __A, const int __imm)
 {
   return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) -1);
 }
 
@@ -11317,7 +11330,7 @@ _mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
                                                  (__v2di)
-                                                 _mm_setzero_di (),
+                                                 _mm_setzero_si128 (),
                                                  (__mmask8) __U);
 }
 
@@ -12350,14 +12363,15 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #else
 #define _mm256_permutex_pd(X, M)                                               \
   ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M),    \
-                                           (__v4df)(__m256d)_mm256_undefined_pd(),\
+                                           (__v4df)(__m256d)                   \
+                                           _mm256_undefined_pd (),             \
                                            (__mmask8)-1))
 
 #define _mm256_maskz_permutex_epi64(M, X, I)                    \
   ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
                                            (int)(I),                \
                                            (__v4di)(__m256i)        \
-                                           (_mm256_setzero_si256()),\
+                                           (_mm256_setzero_si256 ()),\
                                            (__mmask8)(M)))
 
 #define _mm256_mask_permutex_epi64(W, M, X, I)               \
@@ -12369,7 +12383,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_insertf32x4(X, Y, C)                                     \
   ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
     (__v4sf)(__m128) (Y), (int) (C),                                   \
-    (__v8sf)(__m256)_mm256_setzero_ps(),                               \
+    (__v8sf)(__m256)_mm256_setzero_ps (),                              \
     (__mmask8)-1))
 
 #define _mm256_mask_insertf32x4(W, U, X, Y, C)                          \
@@ -12381,13 +12395,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_insertf32x4(U, X, Y, C)                            \
   ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X), \
     (__v4sf)(__m128) (Y), (int) (C),                                   \
-    (__v8sf)(__m256)_mm256_setzero_ps(),                               \
+    (__v8sf)(__m256)_mm256_setzero_ps (),                              \
     (__mmask8)(U)))
 
 #define _mm256_inserti32x4(X, Y, C)                                     \
   ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
     (__v4si)(__m128i) (Y), (int) (C),                                  \
-    (__v8si)(__m256i)_mm256_setzero_si256(),                           \
+    (__v8si)(__m256i)_mm256_setzero_si256 (),                          \
     (__mmask8)-1))
 
 #define _mm256_mask_inserti32x4(W, U, X, Y, C)                          \
@@ -12399,13 +12413,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_inserti32x4(U, X, Y, C)                            \
   ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
     (__v4si)(__m128i) (Y), (int) (C),                                  \
-    (__v8si)(__m256i)_mm256_setzero_si256(),                           \
+    (__v8si)(__m256i)_mm256_setzero_si256 (),                          \
     (__mmask8)(U)))
 
 #define _mm256_extractf32x4_ps(X, C)                                    \
   ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
     (int) (C),                                                         \
-    (__v4sf)(__m128)_mm_setzero_ps(),                                  \
+    (__v4sf)(__m128)_mm_setzero_ps (),                                 \
     (__mmask8)-1))
 
 #define _mm256_mask_extractf32x4_ps(W, U, X, C)                         \
@@ -12417,7 +12431,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_extractf32x4_ps(U, X, C)                           \
   ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
     (int) (C),                                                         \
-    (__v4sf)(__m128)_mm_setzero_ps(),                                  \
+    (__v4sf)(__m128)_mm_setzero_ps (),                                 \
     (__mmask8)(U)))
 
 #define _mm256_extracti32x4_epi32(X, C)                                 \
@@ -12453,7 +12467,8 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_shuffle_i32x4(X, Y, C)                                                   \
   ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
                                                   (__v8si)(__m256i)(Y), (int)(C),       \
-                                                  (__v8si)(__m256i)_mm256_setzero_si256(), \
+                                                 (__v8si)(__m256i)                     \
+                                                 _mm256_setzero_si256 (),              \
                                                   (__mmask8)-1))
 
 #define _mm256_mask_shuffle_i32x4(W, U, X, Y, C)                                        \
@@ -12465,13 +12480,14 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_shuffle_i32x4(U, X, Y, C)                                          \
   ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
                                                   (__v8si)(__m256i)(Y), (int)(C),       \
-                                                  (__v8si)(__m256i)_mm256_setzero_si256(), \
+                                                 (__v8si)(__m256i)                     \
+                                                 _mm256_setzero_si256 (),              \
                                                   (__mmask8)(U)))
 
 #define _mm256_shuffle_f64x2(X, Y, C)                                                   \
   ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
                                                   (__v4df)(__m256d)(Y), (int)(C),       \
-                                                  (__v4df)(__m256d)_mm256_setzero_pd(), \
+                                                 (__v4df)(__m256d)_mm256_setzero_pd (),\
                                                   (__mmask8)-1))
 
 #define _mm256_mask_shuffle_f64x2(W, U, X, Y, C)                                        \
@@ -12483,13 +12499,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_shuffle_f64x2(U, X, Y, C)                                          \
   ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
                                                   (__v4df)(__m256d)(Y), (int)(C),       \
-                                                  (__v4df)(__m256d)_mm256_setzero_pd(), \
+                                                 (__v4df)(__m256d)_mm256_setzero_pd( ),\
                                                   (__mmask8)(U)))
 
 #define _mm256_shuffle_f32x4(X, Y, C)                                                   \
   ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
                                                  (__v8sf)(__m256)(Y), (int)(C),         \
-                                                 (__v8sf)(__m256)_mm256_setzero_ps(),   \
+                                                (__v8sf)(__m256)_mm256_setzero_ps (),  \
                                                  (__mmask8)-1))
 
 #define _mm256_mask_shuffle_f32x4(W, U, X, Y, C)                                        \
@@ -12501,7 +12517,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_shuffle_f32x4(U, X, Y, C)                                          \
   ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
                                                  (__v8sf)(__m256)(Y), (int)(C),         \
-                                                 (__v8sf)(__m256)_mm256_setzero_ps(),   \
+                                                (__v8sf)(__m256)_mm256_setzero_ps (),  \
                                                  (__mmask8)(U)))
 
 #define _mm256_mask_shuffle_pd(W, U, A, B, C)                                   \
@@ -12513,7 +12529,8 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_shuffle_pd(U, A, B, C)                                     \
   ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
                                            (__v4df)(__m256d)(B), (int)(C),      \
-                                           (__v4df)(__m256d)_mm256_setzero_pd(),\
+                                          (__v4df)(__m256d)                    \
+                                          _mm256_setzero_pd (),                \
                                            (__mmask8)(U)))
 
 #define _mm_mask_shuffle_pd(W, U, A, B, C)                                      \
@@ -12525,7 +12542,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm_maskz_shuffle_pd(U, A, B, C)                                        \
   ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
                                            (__v2df)(__m128d)(B), (int)(C),      \
-                                           (__v2df)(__m128d)_mm_setzero_pd(),   \
+                                          (__v2df)(__m128d)_mm_setzero_pd (),  \
                                            (__mmask8)(U)))
 
 #define _mm256_mask_shuffle_ps(W, U, A, B, C)                                   \
@@ -12537,7 +12554,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_shuffle_ps(U, A, B, C)                                     \
   ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
                                            (__v8sf)(__m256)(B), (int)(C),       \
-                                           (__v8sf)(__m256)_mm256_setzero_ps(), \
+                                          (__v8sf)(__m256)_mm256_setzero_ps (),\
                                            (__mmask8)(U)))
 
 #define _mm_mask_shuffle_ps(W, U, A, B, C)                                      \
@@ -12549,7 +12566,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm_maskz_shuffle_ps(U, A, B, C)                                        \
   ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
                                            (__v4sf)(__m128)(B), (int)(C),       \
-                                           (__v4sf)(__m128)_mm_setzero_ps(),    \
+                                          (__v4sf)(__m128)_mm_setzero_ps (),   \
                                            (__mmask8)(U)))
 
 #define _mm256_fixupimm_pd(X, Y, Z, C)                                          \
@@ -12632,7 +12649,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_srli_epi32(U, A, B)                               \
   ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),      \
-    (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
+    (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
 
 #define _mm_mask_srli_epi32(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
@@ -12640,7 +12657,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_srli_epi32(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
-    (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
+    (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_mask_srli_epi64(W, U, A, B)                             \
   ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),      \
@@ -12656,26 +12673,26 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_srli_epi64(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
-    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
+    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_mask_slli_epi32(W, U, X, C)                                \
   ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
-    (__v8si)(__m256i)(W),\
+    (__v8si)(__m256i)(W),                                                \
     (__mmask8)(U)))
 
 #define _mm256_maskz_slli_epi32(U, X, C)                                  \
   ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
-    (__v8si)(__m256i)_mm256_setzero_si256(),\
+    (__v8si)(__m256i)_mm256_setzero_si256 (),                            \
     (__mmask8)(U)))
 
 #define _mm256_mask_slli_epi64(W, U, X, C)                                \
   ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
-    (__v4di)(__m256i)(W),\
+    (__v4di)(__m256i)(W),                                                \
     (__mmask8)(U)))
 
 #define _mm256_maskz_slli_epi64(U, X, C)                                  \
   ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
-    (__v4di)(__m256i)_mm256_setzero_si256 (),\
+    (__v4di)(__m256i)_mm256_setzero_si256 (),                            \
     (__mmask8)(U)))
 
 #define _mm_mask_slli_epi32(W, U, X, C)                                          \
@@ -12695,7 +12712,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_slli_epi64(U, X, C)                                    \
   ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
-    (__v2di)(__m128i)_mm_setzero_di(),\
+    (__v2di)(__m128i)_mm_setzero_si128 (),\
     (__mmask8)(U)))
 
 #define _mm256_ternarylogic_epi64(A, B, C, I)                           \
@@ -12748,7 +12765,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_roundscale_ps(A, B)                                     \
   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
-    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)-1))
+    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
 
 #define _mm256_mask_roundscale_ps(W, U, A, B)                          \
   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
@@ -12756,11 +12773,11 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_roundscale_ps(U, A, B)                            \
   ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
-    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps(), (__mmask8)(U)))
+    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
 
 #define _mm256_roundscale_pd(A, B)                                     \
   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)-1))
+    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
 
 #define _mm256_mask_roundscale_pd(W, U, A, B)                          \
   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
@@ -12768,11 +12785,11 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_roundscale_pd(U, A, B)                            \
   ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
+    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
 
 #define _mm_roundscale_ps(A, B)                                                \
   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
-    (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)-1))
+    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
 
 #define _mm_mask_roundscale_ps(W, U, A, B)                             \
   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
@@ -12780,11 +12797,11 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_roundscale_ps(U, A, B)                               \
   ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
-    (int)(B), (__v4sf)(__m128)_mm_setzero_ps(), (__mmask8)(U)))
+    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
 
 #define _mm_roundscale_pd(A, B)                                                \
   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
-    (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)-1))
+    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
 
 #define _mm_mask_roundscale_pd(W, U, A, B)                             \
   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
@@ -12792,12 +12809,12 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_roundscale_pd(U, A, B)                               \
   ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
-    (int)(B), (__v2df)(__m128d)_mm_setzero_pd(), (__mmask8)(U)))
+    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
 
 #define _mm256_getmant_ps(X, B, C)                                              \
   ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
                                          (int)(((C)<<2) | (B)),                 \
-                                         (__v8sf)(__m256)_mm256_setzero_ps(),   \
+                                        (__v8sf)(__m256)_mm256_setzero_ps (),  \
                                          (__mmask8)-1))
 
 #define _mm256_mask_getmant_ps(W, U, X, B, C)                                   \
@@ -12809,13 +12826,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_getmant_ps(U, X, B, C)                                     \
   ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
                                          (int)(((C)<<2) | (B)),                 \
-                                         (__v8sf)(__m256)_mm256_setzero_ps(),   \
+                                        (__v8sf)(__m256)_mm256_setzero_ps (),  \
                                          (__mmask8)(U)))
 
 #define _mm_getmant_ps(X, B, C)                                                 \
   ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
                                          (int)(((C)<<2) | (B)),                 \
-                                         (__v4sf)(__m128)_mm_setzero_ps(),      \
+                                        (__v4sf)(__m128)_mm_setzero_ps (),     \
                                          (__mmask8)-1))
 
 #define _mm_mask_getmant_ps(W, U, X, B, C)                                      \
@@ -12827,13 +12844,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm_maskz_getmant_ps(U, X, B, C)                                        \
   ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
                                          (int)(((C)<<2) | (B)),                 \
-                                         (__v4sf)(__m128)_mm_setzero_ps(),      \
+                                        (__v4sf)(__m128)_mm_setzero_ps (),     \
                                          (__mmask8)(U)))
 
 #define _mm256_getmant_pd(X, B, C)                                              \
   ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
                                          (int)(((C)<<2) | (B)),                 \
-                                          (__v4df)(__m256d)_mm256_setzero_pd(), \
+                                         (__v4df)(__m256d)_mm256_setzero_pd (),\
                                           (__mmask8)-1))
 
 #define _mm256_mask_getmant_pd(W, U, X, B, C)                                   \
@@ -12845,13 +12862,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm256_maskz_getmant_pd(U, X, B, C)                                     \
   ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
                                          (int)(((C)<<2) | (B)),                 \
-                                          (__v4df)(__m256d)_mm256_setzero_pd(), \
+                                         (__v4df)(__m256d)_mm256_setzero_pd (),\
                                           (__mmask8)(U)))
 
 #define _mm_getmant_pd(X, B, C)                                                 \
   ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
                                          (int)(((C)<<2) | (B)),                 \
-                                          (__v2df)(__m128d)_mm_setzero_pd(),    \
+                                         (__v2df)(__m128d)_mm_setzero_pd (),   \
                                           (__mmask8)-1))
 
 #define _mm_mask_getmant_pd(W, U, X, B, C)                                      \
@@ -12863,7 +12880,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 #define _mm_maskz_getmant_pd(U, X, B, C)                                        \
   ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
                                          (int)(((C)<<2) | (B)),                 \
-                                          (__v2df)(__m128d)_mm_setzero_pd(),    \
+                                         (__v2df)(__m128d)_mm_setzero_pd (),   \
                                           (__mmask8)(U)))
 
 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)     \
@@ -13129,7 +13146,8 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_shuffle_epi32(U, X, C)                                         \
   ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
-                                             (__v8si)(__m256i)_mm256_setzero_si256(),  \
+                                            (__v8si)(__m256i)                      \
+                                            _mm256_setzero_si256 (),               \
                                              (__mmask8)(U)))
 
 #define _mm_mask_shuffle_epi32(W, U, X, C)                                          \
@@ -13139,7 +13157,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_shuffle_epi32(U, X, C)                                            \
   ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
-                                             (__v4si)(__m128i)_mm_setzero_si128 (),     \
+                                            (__v4si)(__m128i)_mm_setzero_si128 (), \
                                              (__mmask8)(U)))
 
 #define _mm256_rol_epi64(A, B)                                                 \
@@ -13159,7 +13177,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_rol_epi64(A, B)                                                    \
   ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)_mm_setzero_di(),   \
+                                         (__v2di)(__m128i)_mm_setzero_si128 (),\
                                           (__mmask8)-1))
 
 #define _mm_mask_rol_epi64(W, U, A, B)                                         \
@@ -13169,7 +13187,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_rol_epi64(U, A, B)                                           \
   ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)_mm_setzero_di(),   \
+                                         (__v2di)(__m128i)_mm_setzero_si128 (),\
                                           (__mmask8)(U)))
 
 #define _mm256_ror_epi64(A, B)                                                 \
@@ -13189,7 +13207,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_ror_epi64(A, B)                                                    \
   ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)_mm_setzero_di(),   \
+                                         (__v2di)(__m128i)_mm_setzero_si128 (),\
                                           (__mmask8)-1))
 
 #define _mm_mask_ror_epi64(W, U, A, B)                                         \
@@ -13199,12 +13217,12 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_ror_epi64(U, A, B)                                           \
   ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
-                                          (__v2di)(__m128i)_mm_setzero_di(),   \
+                                         (__v2di)(__m128i)_mm_setzero_si128 (),\
                                           (__mmask8)(U)))
 
 #define _mm256_rol_epi32(A, B)                                                 \
   ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
+                                         (__v8si)(__m256i)_mm256_setzero_si256 (),\
                                           (__mmask8)-1))
 
 #define _mm256_mask_rol_epi32(W, U, A, B)                                      \
@@ -13214,12 +13232,12 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_rol_epi32(U, A, B)                                        \
   ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
+                                         (__v8si)(__m256i)_mm256_setzero_si256 (),\
                                           (__mmask8)(U)))
 
 #define _mm_rol_epi32(A, B)                                                    \
   ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
+                                         (__v4si)(__m128i)_mm_setzero_si128 (),\
                                           (__mmask8)-1))
 
 #define _mm_mask_rol_epi32(W, U, A, B)                                         \
@@ -13229,12 +13247,12 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_rol_epi32(U, A, B)                                           \
   ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
+                                         (__v4si)(__m128i)_mm_setzero_si128 (),\
                                           (__mmask8)(U)))
 
 #define _mm256_ror_epi32(A, B)                                                 \
   ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
+                                         (__v8si)(__m256i)_mm256_setzero_si256 (),\
                                           (__mmask8)-1))
 
 #define _mm256_mask_ror_epi32(W, U, A, B)                                      \
@@ -13244,12 +13262,13 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_ror_epi32(U, A, B)                                        \
   ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
-                                          (__v8si)(__m256i)_mm256_setzero_si256(),\
+                                         (__v8si)(__m256i)                    \
+                                         _mm256_setzero_si256 (),             \
                                           (__mmask8)(U)))
 
 #define _mm_ror_epi32(A, B)                                                    \
   ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
+                                         (__v4si)(__m128i)_mm_setzero_si128 (),\
                                           (__mmask8)-1))
 
 #define _mm_mask_ror_epi32(W, U, A, B)                                         \
@@ -13259,7 +13278,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_ror_epi32(U, A, B)                                           \
   ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
-                                          (__v4si)(__m128i)_mm_setzero_si128 (),   \
+                                         (__v4si)(__m128i)_mm_setzero_si128 (),\
                                           (__mmask8)(U)))
 
 #define _mm256_alignr_epi32(X, Y, C)                                        \
@@ -13298,7 +13317,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_alignr_epi32(U, X, Y, C)                                  \
     ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
-        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128(),\
+       (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
         (__mmask8)(U)))
 
 #define _mm_alignr_epi64(X, Y, C)                                           \
@@ -13311,7 +13330,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
     ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
-        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128(),\
+       (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
         (__mmask8)(U)))
 
 #define _mm_mask_cvtps_ph(W, U, A, I)                                          \
@@ -13320,7 +13339,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_cvtps_ph(U, A, I)                                            \
   ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
-      (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
+      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
 
 #define _mm256_mask_cvtps_ph(W, U, A, I)                                       \
   ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),  \
@@ -13328,7 +13347,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_cvtps_ph(U, A, I)                                         \
   ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),   \
-      (__v8hi)(__m128i) _mm_setzero_hi(), (__mmask8) (U)))
+      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
 
 #define _mm256_mask_srai_epi32(W, U, A, B)                             \
   ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),      \
@@ -13336,7 +13355,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_srai_epi32(U, A, B)                               \
   ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),      \
-    (int)(B), (__v8si)_mm256_setzero_si256(), (__mmask8)(U)))
+    (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
 
 #define _mm_mask_srai_epi32(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
@@ -13344,7 +13363,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_srai_epi32(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
-    (int)(B), (__v4si)_mm_setzero_si128(), (__mmask8)(U)))
+    (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_srai_epi64(A, B)                                                \
   ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),      \
@@ -13360,7 +13379,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_srai_epi64(A, B)                                           \
   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
-    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)-1))
+    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
 
 #define _mm_mask_srai_epi64(W, U, A, B)                                 \
   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
@@ -13368,7 +13387,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_srai_epi64(U, A, B)                                   \
   ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
-    (int)(B), (__v2di)_mm_setzero_si128(), (__mmask8)(U)))
+    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
 
 #define _mm256_mask_permutex_pd(W, U, A, B)                             \
   ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
@@ -13376,7 +13395,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_permutex_pd(U, A, B)                              \
   ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
-    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd(), (__mmask8)(U)))
+    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
 
 #define _mm256_mask_permute_pd(W, U, X, C)                                         \
   ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),     \
@@ -13385,7 +13404,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_permute_pd(U, X, C)                                           \
   ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),     \
-                                             (__v4df)(__m256d)_mm256_setzero_pd(), \
+                                             (__v4df)(__m256d)_mm256_setzero_pd (),\
                                              (__mmask8)(U)))
 
 #define _mm256_mask_permute_ps(W, U, X, C)                                         \
@@ -13394,7 +13413,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm256_maskz_permute_ps(U, X, C)                                           \
   ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),       \
-                                             (__v8sf)(__m256)_mm256_setzero_ps(),  \
+                                             (__v8sf)(__m256)_mm256_setzero_ps (), \
                                              (__mmask8)(U)))
 
 #define _mm_mask_permute_pd(W, U, X, C)                                                    \
@@ -13403,7 +13422,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_permute_pd(U, X, C)                                              \
   ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),        \
-                                           (__v2df)(__m128d)_mm_setzero_pd(),      \
+                                           (__v2df)(__m128d)_mm_setzero_pd (),     \
                                            (__mmask8)(U)))
 
 #define _mm_mask_permute_ps(W, U, X, C)                                                    \
@@ -13412,7 +13431,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #define _mm_maskz_permute_ps(U, X, C)                                              \
   ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),          \
-                                         (__v4sf)(__m128)_mm_setzero_ps(),         \
+                                         (__v4sf)(__m128)_mm_setzero_ps (),        \
                                          (__mmask8)(U)))
 
 #define _mm256_mask_blend_pd(__U, __A, __W)                          \
@@ -13577,7 +13596,7 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
 
 #endif
 
-#define _mm256_permutexvar_ps(A, B)    _mm256_permutevar8x32_ps((B), (A))
+#define _mm256_permutexvar_ps(A, B)    _mm256_permutevar8x32_ps ((B), (A))
 
 #ifdef __DISABLE_AVX512VL__
 #undef __DISABLE_AVX512VL__