[X86] Turn setne X, signedmax into setgt signedmax, X in LowerVSETCC to avoid an...

author Craig Topper <craig.topper@intel.com>

Fri, 23 Feb 2018 00:21:39 +0000 (00:21 +0000)

committer Craig Topper <craig.topper@intel.com>

Fri, 23 Feb 2018 00:21:39 +0000 (00:21 +0000)
author Craig Topper <craig.topper@intel.com>
Fri, 23 Feb 2018 00:21:39 +0000 (00:21 +0000)
committer Craig Topper <craig.topper@intel.com>
Fri, 23 Feb 2018 00:21:39 +0000 (00:21 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 5dd0b9ea189800bfa55a2b620362543dbef8166e..0f9f9bf1b833a3b8a586d39686a60b0135bf5cb5 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18100,12 +18100,16 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
    }
  
    // If this is a SETNE against the signed minimum value, change it to SETGT.
+  // If this is a SETNE against the signed maximum value, change it to SETLT 
+  // which will be swapped to SETGT.
    // Otherwise we use PCMPEQ+invert.
    APInt ConstValue;
    if (Cond == ISD::SETNE &&
-      ISD::isConstantSplatVector(Op1.getNode(), ConstValue),
-      ConstValue.isMinSignedValue()) {
-    Cond = ISD::SETGT;
+      ISD::isConstantSplatVector(Op1.getNode(), ConstValue)) {
+    if (ConstValue.isMinSignedValue())
+      Cond = ISD::SETGT;
+    else if (ConstValue.isMaxSignedValue())
+      Cond = ISD::SETLT;
    }
  
    // If both operands are known non-negative, then an unsigned compare is the
diff --git a/llvm/test/CodeGen/X86/vector-compare-simplify.ll b/llvm/test/CodeGen/X86/vector-compare-simplify.ll

index f1ac60134eb80d25544e76b168bf27394589839e..e7f1d849b2f7b7a5ac799b886511ff10f7ec4c98 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-compare-simplify.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-simplify.ll
@@ -345,3 +345,18 @@ define <4 x i32> @ne_smin(<4 x i32> %x) {
    ret <4 x i32> %r
  }
  
+; Make sure we can efficiently handle ne smax by turning into sgt. We can't fold
+; the constant pool load, but the alternative is a cmpeq+invert which is 3 instructions.
+; The PCMPGT version is two instructions given sufficient register allocation freedom
+; to avoid the last mov to %xmm0 seen here.
+define <4 x i32> @ne_smax(<4 x i32> %x) {
+; CHECK-LABEL: ne_smax:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
+; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %cmp = icmp ne <4 x i32> %x, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %r = sext <4 x i1> %cmp to <4 x i32>
+  ret <4 x i32> %r
+}
author	Craig Topper <craig.topper@intel.com>
	Fri, 23 Feb 2018 00:21:39 +0000 (00:21 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Fri, 23 Feb 2018 00:21:39 +0000 (00:21 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vector-compare-simplify.ll		patch \| blob \| history