[X86] Use vmovss to handle inserting an element into index 0 of a v8f32 vector of...

author Craig Topper <craig.topper@gmail.com>

Thu, 5 Mar 2015 06:38:42 +0000 (06:38 +0000)

committer Craig Topper <craig.topper@gmail.com>

Thu, 5 Mar 2015 06:38:42 +0000 (06:38 +0000)
author Craig Topper <craig.topper@gmail.com>
Thu, 5 Mar 2015 06:38:42 +0000 (06:38 +0000)
committer Craig Topper <craig.topper@gmail.com>
Thu, 5 Mar 2015 06:38:42 +0000 (06:38 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index e5af729..e511492 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5634,12 +5634,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
  
        if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
            (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
-        if (VT.is256BitVector() || VT.is512BitVector()) {
+        if (VT.is512BitVector()) {
            SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
            return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
                               Item, DAG.getIntPtrConstant(0));
          }
-        assert(VT.is128BitVector() && "Expected an SSE value type!");
+        assert((VT.is128BitVector() || VT.is256BitVector()) &&
+               "Expected an SSE value type!");
          Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
          // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
          return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -9333,6 +9334,15 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
    ArrayRef<int> Mask = SVOp->getMask();
    assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
  
+  // If we have a single input to the zero element, insert that into V1 if we
+  // can do so cheaply.
+  int NumV2Elements =
+      std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; });
+  if (NumV2Elements == 1 && Mask[0] >= 8)
+    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+            DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
+      return Insertion;
+
    if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
                                                  Subtarget, DAG))
      return Blend;
diff --git a/llvm/test/CodeGen/X86/2012-1-10-buildvector.ll b/llvm/test/CodeGen/X86/2012-1-10-buildvector.ll

index a5f64c5..a9b8cc6 100644 (file)
--- a/llvm/test/CodeGen/X86/2012-1-10-buildvector.ll
+++ b/llvm/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -17,7 +17,8 @@ entry:
  ;CHECK-LABEL: bad_insert:
  define void @bad_insert(i32 %t) {
  entry:
-;CHECK: vpinsrd
+;CHECK: vxorps %ymm1, %ymm1, %ymm1
+;CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
    store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
  ;CHECK: ret
diff --git a/llvm/test/CodeGen/X86/avx-basic.ll b/llvm/test/CodeGen/X86/avx-basic.ll

index 5307527..683f43b 100644 (file)
--- a/llvm/test/CodeGen/X86/avx-basic.ll
+++ b/llvm/test/CodeGen/X86/avx-basic.ll
@@ -91,3 +91,12 @@ entry:
    %vecext.i = extractelement <2 x i64> %a, i32 0
    ret i64 %vecext.i
  }
+
+; PR22685
+; CHECK: mov00
+; CHECK vmovss
+define <8 x float> @mov00_8f32(float* %ptr) {
+  %val = load float, float* %ptr
+  %vec = insertelement <8 x float> zeroinitializer, float %val, i32 0
+  ret <8 x float> %vec
+}
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll

index ee5bd0e..d2f213b 100644 (file)
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -30,8 +30,7 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
  ; CHECK: mov00
  define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
    %val = load float, float* %ptr
-; CHECK: vinsertps
-; CHECK: vinsertf128
+; CHECK: vmovss (%
    %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
    ret <8 x float> %i0
  ; CHECK: ret
@@ -40,8 +39,7 @@ define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
  ; CHECK: mov01
  define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
    %val = load double, double* %ptr
-; CHECK: vmovlpd
-; CHECK: vinsertf128
+; CHECK: vmovsd (%
    %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
    ret <4 x double> %i0
  ; CHECK: ret
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll

index 624b3f2..417423a 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -131,11 +131,10 @@ define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
  ;
  ; AVX2-LABEL: shuffle_v8f32_70000000:
  ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
  ; AVX2-NEXT:    movl $7, %eax
-; AVX2-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    vmovd %eax, %xmm1
  ; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT:    vinserti128 $0, %xmm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
  ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
  ; AVX2-NEXT:    retq
    %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -961,11 +960,10 @@ define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
  ;
  ; AVX2-LABEL: shuffle_v8i32_70000000:
  ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
  ; AVX2-NEXT:    movl $7, %eax
-; AVX2-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    vmovd %eax, %xmm1
  ; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT:    vinserti128 $0, %xmm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3,4,5,6,7]
  ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
  ; AVX2-NEXT:    retq
    %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
author	Craig Topper <craig.topper@gmail.com>
	Thu, 5 Mar 2015 06:38:42 +0000 (06:38 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Thu, 5 Mar 2015 06:38:42 +0000 (06:38 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/2012-1-10-buildvector.ll		patch \| blob \| history
llvm/test/CodeGen/X86/avx-basic.ll		patch \| blob \| history
llvm/test/CodeGen/X86/avx-load-store.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll		patch \| blob \| history