This is similar to what we do for loadl_pi and loadh_pi.
llvm-svn: 365669
TARGET_HEADER_BUILTIN(_mm_getcsr, "Ui", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse")
TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "ncV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "nV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "nV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "nV:128:", "sse")
TARGET_BUILTIN(__builtin_ia32_sfence, "v", "n", "sse")
TARGET_HEADER_BUILTIN(_mm_sfence, "v", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse")
return Builder.CreateCall(Intr, Ops);
}
- case X86::BI__builtin_ia32_storehps:
- case X86::BI__builtin_ia32_storelps: {
- llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
- llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
-
- // cast val v2i64
- Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
-
- // extract (0, 1)
- unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
- Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract");
-
- // cast pointer to i64 & store
- Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
- return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
- }
case X86::BI__builtin_ia32_vextractf128_pd256:
case X86::BI__builtin_ia32_vextractf128_ps256:
case X86::BI__builtin_ia32_vextractf128_si256:
static __inline__ void __DEFAULT_FN_ATTRS
_mm_storeh_pi(__m64 *__p, __m128 __a)
{
- __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a);
+ typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8)));
+ struct __mm_storeh_pi_struct {
+ __mm_storeh_pi_v2f32 __u;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3);
}
/// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a
static __inline__ void __DEFAULT_FN_ATTRS
_mm_storel_pi(__m64 *__p, __m128 __a)
{
- __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a);
+ typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8)));
+ struct __mm_storeh_pi_struct {
+ __mm_storeh_pi_v2f32 __u;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1);
}
/// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a
#endif
tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
(void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
- (void) __builtin_ia32_storehps(tmp_V2ip, tmp_V4f);
- (void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f);
tmp_i = __builtin_ia32_movmskps(tmp_V4f);
tmp_i = __builtin_ia32_pmovmskb(tmp_V8c);
(void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi);
void test_mm_storeh_pi(__m64* x, __m128 y) {
// CHECK-LABEL: test_mm_storeh_pi
- // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
- // CHECK: extractelement <2 x i64> %{{.*}}, i64 1
- // CHECK: store i64 %{{.*}}, i64* {{.*}}
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 2, i32 3>
+ // CHECK: store <2 x float> %{{.*}}, <2 x float>* %{{.*}}, align 1{{$}}
_mm_storeh_pi(x, y);
}
void test_mm_storel_pi(__m64* x, __m128 y) {
// CHECK-LABEL: test_mm_storel_pi
- // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
- // CHECK: extractelement <2 x i64> %{{.*}}, i64 0
- // CHECK: store i64 %{{.*}}, i64* {{.*}}
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
+ // CHECK: store <2 x float> %{{.*}}, <2 x float>* %{{.*}}, align 1{{$}}
_mm_storel_pi(x, y);
}