Usages of these intrinsics in a 32-bit build results in assertions in the backend.
llvm-svn: 276249
(__v64qi) _mm512_setzero_hi ());
}
+#ifdef __x86_64__
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
{
_mm512_setzero_qi(),
__M);
}
+#endif
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
(int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
(int)(P), (int)(R)); })
+#ifdef __x86_64__
#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
(long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
+#endif
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
(__v8di)(__m512i)(C), (int)(imm), \
(__mmask8)(U)); })
+#ifdef __x86_64__
#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
(long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
+#endif
#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
(int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
_MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
(unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
(int)(R)); })
__A,
_MM_FROUND_CUR_DIRECTION);
}
+#endif
#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
(int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
(int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
+#ifdef __x86_64__
#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
(long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
(long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
+#endif
#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
(unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
_MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
(unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
(int)(R)); })
__A,
_MM_FROUND_CUR_DIRECTION);
}
+#endif
#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
(int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
_MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
(long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
_MM_FROUND_CUR_DIRECTION);
}
+#endif
#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
(unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
_MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
(unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
(int)(R)); })
__A,
_MM_FROUND_CUR_DIRECTION);
}
+#endif
#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
(int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
_MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
(long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
_MM_FROUND_CUR_DIRECTION);
}
+#endif
#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
(unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
_MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
(unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
(int)(R)); })
__A,
_MM_FROUND_CUR_DIRECTION);
}
+#endif
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
}
#define _mm_cvtss_i32 _mm_cvtss_si32
-#define _mm_cvtss_i64 _mm_cvtss_si64
#define _mm_cvtsd_i32 _mm_cvtsd_si32
-#define _mm_cvtsd_i64 _mm_cvtsd_si64
#define _mm_cvti32_sd _mm_cvtsi32_sd
-#define _mm_cvti64_sd _mm_cvtsi64_sd
#define _mm_cvti32_ss _mm_cvtsi32_ss
+#ifdef __x86_64__
+#define _mm_cvtss_i64 _mm_cvtss_si64
+#define _mm_cvtsd_i64 _mm_cvtsd_si64
+#define _mm_cvti64_sd _mm_cvtsi64_sd
#define _mm_cvti64_ss _mm_cvtsi64_ss
+#endif
+#ifdef __x86_64__
#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
(__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
(int)(R)); })
#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
(__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
(int)(R)); })
+#endif
#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
(__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
(__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
+#ifdef __x86_64__
#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
(__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
(int)(R)); })
#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
(__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
(int)(R)); })
+#endif
#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
(__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
}
+#ifdef __x86_64__
#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
(__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
(unsigned long long)(B), (int)(R)); })
return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
_MM_FROUND_CUR_DIRECTION);
}
+#endif
#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
(__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
_MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
(__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
(unsigned long long)(B), (int)(R)); })
return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
_MM_FROUND_CUR_DIRECTION);
}
+#endif
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
__M);
}
+#ifdef __x86_64__
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
{
return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
__M);
}
+#endif
static __inline __m512i __DEFAULT_FN_ATTRS
_mm512_set_epi32 (int __A, int __B, int __C, int __D,
}
+#ifdef __x86_64__
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
{
_mm256_setzero_si256 (),
__M);
}
+#endif
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
(__v8si)_mm256_setzero_si256(), \
(__mmask8)(M)); })
+#ifdef __x86_64__
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
{
_mm256_setzero_si256 (),
__M);
}
+#endif
#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
(__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
}
static __inline__ void __DEFAULT_FN_ATTRS
-_fxsave64(void *__p) {
- return __builtin_ia32_fxsave64(__p);
+_fxrstor(void *__p) {
+ return __builtin_ia32_fxrstor(__p);
}
+#ifdef __x86_64__
static __inline__ void __DEFAULT_FN_ATTRS
-_fxrstor(void *__p) {
- return __builtin_ia32_fxrstor(__p);
+_fxsave64(void *__p) {
+ return __builtin_ia32_fxsave64(__p);
}
static __inline__ void __DEFAULT_FN_ATTRS
_fxrstor64(void *__p) {
return __builtin_ia32_fxrstor64(__p);
}
+#endif
#undef __DEFAULT_FN_ATTRS
return _mm_cvttss_si32(__a);
}
+#ifdef __x86_64__
/// \brief Converts a float value contained in the lower 32 bits of a vector of
/// [4 x float] into a 64-bit integer, truncating the result when it is
/// inexact.
{
return __builtin_ia32_cvttss2si64((__v4sf)__a);
}
+#endif
/// \brief Converts two low-order float values in a 128-bit vector of
/// [4 x float] into a 64-bit vector of [2 x i32], truncating the result
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_unpackhi_pd(__W, __U, __A, __B);
}
+#if __x86_64__
unsigned long long test_mm_cvt_roundsd_si64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvt_roundsd_si64
// CHECK: @llvm.x86.avx512.vcvtsd2si64
return _mm_cvt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION);
}
+#endif
__m512i test_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask2_permutex2var_epi32
// CHECK: @llvm.x86.avx512.mask.vpermi2var.d.512
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_unpackhi_pd(__U, __A, __B);
}
+#if __x86_64__
long long test_mm_cvt_roundsd_i64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvt_roundsd_i64
// CHECK: @llvm.x86.avx512.vcvtsd2si64
return _mm_cvt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION);
}
+#endif
__m512d test_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask2_permutex2var_pd
// CHECK: @llvm.x86.avx512.mask.vpermi2var.pd.512
return _mm_cvtsd_u32(__A);
}
+#ifdef __x86_64__
unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvt_roundsd_u64
// CHECK: @llvm.x86.avx512.vcvtsd2usi64
// CHECK: @llvm.x86.avx512.vcvtsd2usi64
return _mm_cvtsd_u64(__A);
}
+#endif
int test_mm_cvt_roundss_si32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_si32
return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
int test_mm_cvt_roundss_si64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_si64
// CHECK: @llvm.x86.avx512.vcvtss2si64
// CHECK: @llvm.x86.avx512.vcvtss2si64
return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION);
}
+#endif
unsigned test_mm_cvt_roundss_u32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_u32
return _mm_cvtss_u32(__A);
}
+#ifdef __x86_64__
unsigned long long test_mm_cvt_roundss_u64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvt_roundss_u64
// CHECK: @llvm.x86.avx512.vcvtss2usi64
// CHECK: @llvm.x86.avx512.vcvtss2usi64
return _mm_cvtss_u64(__A);
}
+#endif
int test_mm_cvtt_roundsd_i32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_i32
return _mm_cvttsd_i32(__A);
}
+#ifdef __x86_64__
unsigned long long test_mm_cvtt_roundsd_si64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_si64
// CHECK: @llvm.x86.avx512.cvttsd2si64
// CHECK: @llvm.x86.avx512.cvttsd2si64
return _mm_cvttsd_i64(__A);
}
+#endif
unsigned test_mm_cvtt_roundsd_u32(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_u32
return _mm_cvttsd_u32(__A);
}
+#ifdef __x86_64__
unsigned long long test_mm_cvtt_roundsd_u64(__m128d __A) {
// CHECK-LABEL: @test_mm_cvtt_roundsd_u64
// CHECK: @llvm.x86.avx512.cvttsd2usi64
// CHECK: @llvm.x86.avx512.cvttsd2usi64
return _mm_cvttsd_u64(__A);
}
+#endif
int test_mm_cvtt_roundss_i32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_i32
return _mm_cvttss_i32(__A);
}
+#ifdef __x86_64__
float test_mm_cvtt_roundss_i64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_i64
// CHECK: @llvm.x86.avx512.cvttss2si64
// CHECK: @llvm.x86.avx512.cvttss2si64
return _mm_cvttss_i64(__A);
}
+#endif
unsigned test_mm_cvtt_roundss_u32(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_u32
return _mm_cvttss_u32(__A);
}
+#ifdef __x86_64__
unsigned long long test_mm_cvtt_roundss_u64(__m128 __A) {
// CHECK-LABEL: @test_mm_cvtt_roundss_u64
// CHECK: @llvm.x86.avx512.cvttss2usi64
// CHECK: @llvm.x86.avx512.cvttss2usi64
return _mm_cvttss_u64(__A);
}
+#endif
__m512i test_mm512_cvtt_roundps_epu32(__m512 __A)
{
return _mm_maskz_cvt_roundsd_ss(__U, __A, __B, _MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
__m128d test_mm_cvt_roundi64_sd(__m128d __A, long long __B) {
// CHECK-LABEL: @test_mm_cvt_roundi64_sd
// CHECK: @llvm.x86.avx512.cvtsi2sd64
// CHECK: @llvm.x86.avx512.cvtsi2sd64
return _mm_cvt_roundsi64_sd(__A, __B, _MM_FROUND_CUR_DIRECTION);
}
+#endif
__m128 test_mm_cvt_roundsi32_ss(__m128 __A, int __B) {
// CHECK-LABEL: @test_mm_cvt_roundsi32_ss
return _mm_cvt_roundi32_ss(__A, __B, _MM_FROUND_CUR_DIRECTION);
}
+#ifdef __x86_64__
__m128 test_mm_cvt_roundsi64_ss(__m128 __A, long long __B) {
// CHECK-LABEL: @test_mm_cvt_roundsi64_ss
// CHECK: @llvm.x86.avx512.cvtsi2ss64
// CHECK: @llvm.x86.avx512.cvtsi2ss64
return _mm_cvt_roundi64_ss(__A, __B, _MM_FROUND_CUR_DIRECTION);
}
+#endif
__m128d test_mm_cvt_roundss_sd(__m128d __A, __m128 __B) {
// CHECK-LABEL: @test_mm_cvt_roundss_sd
return _mm_cvtu32_sd(__A, __B);
}
+#ifdef __x86_64__
__m128d test_mm_cvt_roundu64_sd(__m128d __A, unsigned long long __B) {
// CHECK-LABEL: @test_mm_cvt_roundu64_sd
// CHECK: @llvm.x86.avx512.cvtusi642sd
// CHECK: @llvm.x86.avx512.cvtusi642sd
return _mm_cvtu64_sd(__A, __B);
}
+#endif
__m128 test_mm_cvt_roundu32_ss(__m128 __A, unsigned __B) {
// CHECK-LABEL: @test_mm_cvt_roundu32_ss
return _mm_cvtu32_ss(__A, __B);
}
+#ifdef __x86_64__
__m128 test_mm_cvt_roundu64_ss(__m128 __A, unsigned long long __B) {
// CHECK-LABEL: @test_mm_cvt_roundu64_ss
// CHECK: @llvm.x86.avx512.cvtusi642ss
// CHECK: @llvm.x86.avx512.cvtusi642ss
return _mm_cvtu64_ss(__A, __B);
}
+#endif
__m512i test_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
{
__I, __J, __K, __L,__M, __N, __O, __P);
}
+#ifdef __x86_64__
__m512i test_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
{
//CHECK-LABEL: @test_mm512_mask_set1_epi64
//CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.512
return _mm512_mask_set1_epi64 (__O, __M, __A);
}
+#endif
__m512i test_mm512_set_epi64 (long long __A, long long __B, long long __C,
long long __D, long long __E, long long __F,
return _mm_cvtss_i32(A);
}
+#ifdef __x86_64__
long long test_mm_cvtss_i64(__m128 A) {
// CHECK-LABEL: test_mm_cvtss_i64
// CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
return _mm_cvtss_i64(A);
}
+#endif
__m128d test_mm_cvti32_sd(__m128d A, int B) {
// CHECK-LABEL: test_mm_cvti32_sd
return _mm_cvti32_sd(A, B);
}
+#ifdef __x86_64__
__m128d test_mm_cvti64_sd(__m128d A, long long B) {
// CHECK-LABEL: test_mm_cvti64_sd
// CHECK: sitofp i64 %{{.*}} to double
// CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
return _mm_cvti64_sd(A, B);
}
+#endif
__m128 test_mm_cvti32_ss(__m128 A, int B) {
// CHECK-LABEL: test_mm_cvti32_ss
return _mm_cvti32_ss(A, B);
}
+#ifdef __x86_64__
__m128 test_mm_cvti64_ss(__m128 A, long long B) {
// CHECK-LABEL: test_mm_cvti64_ss
// CHECK: sitofp i64 %{{.*}} to float
// CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
return _mm_cvti64_ss(A, B);
}
+#endif
int test_mm_cvtsd_i32(__m128d A) {
// CHECK-LABEL: test_mm_cvtsd_i32
return _mm_cvtsd_i32(A);
}
+#ifdef __x86_64__
long long test_mm_cvtsd_i64(__m128d A) {
// CHECK-LABEL: test_mm_cvtsd_i64
// CHECK: call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %{{.*}})
return _mm_cvtsd_i64(A);
}
+#endif
__m128d test_mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) {
// CHECK-LABEL: @test_mm_mask_cvtss_sd
return _mm256_maskz_set1_epi32(__M, 5);
}
+#ifdef __x86_64__
__m128i test_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
// CHECK-LABEL: @test_mm_mask_set1_epi64
// CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.128
// CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.256
return _mm256_maskz_set1_epi64(__M, __A);
}
+#endif
__m128d test_mm_fixupimm_pd(__m128d __A, __m128d __B, __m128i __C) {
// CHECK-LABEL: @test_mm_fixupimm_pd