[X86] Change the IR sequence for _mm_storeh_pi and _mm_storel_pi to perform the store...

author Craig Topper <craig.topper@intel.com>

Wed, 10 Jul 2019 17:11:29 +0000 (17:11 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 10 Jul 2019 17:11:29 +0000 (17:11 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 10 Jul 2019 17:11:29 +0000 (17:11 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 10 Jul 2019 17:11:29 +0000 (17:11 +0000)
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def

index bc6e382..a0ba0ec 100644 (file)
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -306,8 +306,6 @@ TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "n", "sse")
  TARGET_HEADER_BUILTIN(_mm_getcsr, "Ui", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse")
  TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "ncV:128:", "sse")
  TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "ncV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "nV:128:", "sse")
-TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "nV:128:", "sse")
  TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "nV:128:", "sse")
  TARGET_BUILTIN(__builtin_ia32_sfence, "v", "n", "sse")
  TARGET_HEADER_BUILTIN(_mm_sfence, "v", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp

index f21e02d..52e2d5b 100644 (file)
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10651,22 +10651,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
      return Builder.CreateCall(Intr, Ops);
    }
  
-  case X86::BI__builtin_ia32_storehps:
-  case X86::BI__builtin_ia32_storelps: {
-    llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
-    llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
-
-    // cast val v2i64
-    Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
-
-    // extract (0, 1)
-    unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
-    Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract");
-
-    // cast pointer to i64 & store
-    Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
-    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
-  }
    case X86::BI__builtin_ia32_vextractf128_pd256:
    case X86::BI__builtin_ia32_vextractf128_ps256:
    case X86::BI__builtin_ia32_vextractf128_si256:
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h

index 6f5517e..75ff376 100644 (file)
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -1919,7 +1919,11 @@ _mm_setzero_ps(void)
  static __inline__ void __DEFAULT_FN_ATTRS
  _mm_storeh_pi(__m64 *__p, __m128 __a)
  {
-  __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a);
+  typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8)));
+  struct __mm_storeh_pi_struct {
+    __mm_storeh_pi_v2f32 __u;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3);
  }
  
  /// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a
@@ -1936,7 +1940,11 @@ _mm_storeh_pi(__m64 *__p, __m128 __a)
  static __inline__ void __DEFAULT_FN_ATTRS
  _mm_storel_pi(__m64 *__p, __m128 __a)
  {
-  __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a);
+  typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8)));
+  struct __mm_storeh_pi_struct {
+    __mm_storeh_pi_v2f32 __u;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1);
  }
  
  /// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c

index 0565639..61b9d53 100644 (file)
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -341,8 +341,6 @@ void f0() {
  #endif
    tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f);
    (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp);
-  (void) __builtin_ia32_storehps(tmp_V2ip, tmp_V4f);
-  (void) __builtin_ia32_storelps(tmp_V2ip, tmp_V4f);
    tmp_i = __builtin_ia32_movmskps(tmp_V4f);
    tmp_i = __builtin_ia32_pmovmskb(tmp_V8c);
    (void) __builtin_ia32_movntq(tmp_V1LLip, tmp_V1LLi);
diff --git a/clang/test/CodeGen/sse-builtins.c b/clang/test/CodeGen/sse-builtins.c

index eb47c19..4179341 100644 (file)
--- a/clang/test/CodeGen/sse-builtins.c
+++ b/clang/test/CodeGen/sse-builtins.c
@@ -688,17 +688,15 @@ void test_mm_store1_ps(float* x, __m128 y) {
  
  void test_mm_storeh_pi(__m64* x,  __m128 y) {
    // CHECK-LABEL: test_mm_storeh_pi
-  // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
-  // CHECK: extractelement <2 x i64> %{{.*}}, i64 1
-  // CHECK: store i64 %{{.*}}, i64* {{.*}}
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 2, i32 3>
+  // CHECK: store <2 x float> %{{.*}}, <2 x float>* %{{.*}}, align 1{{$}}
    _mm_storeh_pi(x, y);
  }
  
  void test_mm_storel_pi(__m64* x,  __m128 y) {
    // CHECK-LABEL: test_mm_storel_pi
-  // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
-  // CHECK: extractelement <2 x i64> %{{.*}}, i64 0
-  // CHECK: store i64 %{{.*}}, i64* {{.*}}
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // CHECK: store <2 x float> %{{.*}}, <2 x float>* %{{.*}}, align 1{{$}}
    _mm_storel_pi(x, y);
  }
author	Craig Topper <craig.topper@intel.com>
	Wed, 10 Jul 2019 17:11:29 +0000 (17:11 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 10 Jul 2019 17:11:29 +0000 (17:11 +0000)
clang/include/clang/Basic/BuiltinsX86.def		patch \| blob \| history
clang/lib/CodeGen/CGBuiltin.cpp		patch \| blob \| history
clang/lib/Headers/xmmintrin.h		patch \| blob \| history
clang/test/CodeGen/builtins-x86.c		patch \| blob \| history
clang/test/CodeGen/sse-builtins.c		patch \| blob \| history