[X86] Support folding to andnps with SSE1 only.

author Craig Topper <craig.topper@intel.com>

Sat, 25 Nov 2017 07:20:22 +0000 (07:20 +0000)

committer Craig Topper <craig.topper@intel.com>

Sat, 25 Nov 2017 07:20:22 +0000 (07:20 +0000)
author Craig Topper <craig.topper@intel.com>
Sat, 25 Nov 2017 07:20:22 +0000 (07:20 +0000)
committer Craig Topper <craig.topper@intel.com>
Sat, 25 Nov 2017 07:20:22 +0000 (07:20 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 984ad37..4db9fe8 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35033,10 +35033,13 @@ static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG,
  
    // Vector types are handled in combineANDXORWithAllOnesIntoANDNP().
    if (!((VT == MVT::f32 && Subtarget.hasSSE1()) ||
-        (VT == MVT::f64 && Subtarget.hasSSE2())))
+        (VT == MVT::f64 && Subtarget.hasSSE2()) ||
+        (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2())))
      return SDValue();
  
    auto isAllOnesConstantFP = [](SDValue V) {
+    if (V.getSimpleValueType().isVector())
+      return ISD::isBuildVectorAllOnes(V.getNode());
      auto *C = dyn_cast<ConstantFPSDNode>(V);
      return C && C->getConstantFPValue()->isAllOnesValue();
    };
diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll

index 79696e6..9f738aa 100644 (file)
--- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -55,14 +55,12 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
  define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
  ; X32-LABEL: test_mm_andnot_ps:
  ; X32:       # BB#0:
-; X32-NEXT:    xorps {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    andps %xmm1, %xmm0
+; X32-NEXT:    andnps %xmm1, %xmm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm_andnot_ps:
  ; X64:       # BB#0:
-; X64-NEXT:    xorps {{.*}}(%rip), %xmm0
-; X64-NEXT:    andps %xmm1, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm0
  ; X64-NEXT:    retq
    %arg0 = bitcast <4 x float> %a0 to <4 x i32>
    %arg1 = bitcast <4 x float> %a1 to <4 x i32>
author	Craig Topper <craig.topper@intel.com>
	Sat, 25 Nov 2017 07:20:22 +0000 (07:20 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Sat, 25 Nov 2017 07:20:22 +0000 (07:20 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll		patch \| blob \| history