[TargetLowering] SimplifyMultipleUseDemandedBits - drop already extended ISD::SIGN_EX...

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 18 Jun 2020 15:40:51 +0000 (16:40 +0100)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 18 Jun 2020 15:41:08 +0000 (16:41 +0100)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 18 Jun 2020 15:40:51 +0000 (16:40 +0100)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 18 Jun 2020 15:41:08 +0000 (16:41 +0100)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index d231d35..ed5606c 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -615,6 +615,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
      return DAG.getUNDEF(Op.getValueType());
  
    unsigned NumElts = DemandedElts.getBitWidth();
+  unsigned BitWidth = DemandedBits.getBitWidth();
    KnownBits LHSKnown, RHSKnown;
    switch (Op.getOpcode()) {
    case ISD::BITCAST: {
@@ -720,7 +721,6 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
              DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
        SDValue Op0 = Op.getOperand(0);
        unsigned ShAmt = MaxSA->getZExtValue();
-      unsigned BitWidth = DemandedBits.getBitWidth();
        unsigned NumSignBits =
            DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
        unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
@@ -737,7 +737,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
      // width as the setcc result, and (3) the result of a setcc conforms to 0 or
      // -1, we may be able to bypass the setcc.
      if (DemandedBits.isSignMask() &&
-        Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
+        Op0.getScalarValueSizeInBits() == BitWidth &&
          getBooleanContents(Op0.getValueType()) ==
              BooleanContent::ZeroOrNegativeOneBooleanContent) {
        // If we're testing X < 0, then this compare isn't needed - just use X!
@@ -752,9 +752,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
    }
    case ISD::SIGN_EXTEND_INREG: {
      // If none of the extended bits are demanded, eliminate the sextinreg.
+    SDValue Op0 = Op.getOperand(0);
      EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-    if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
-      return Op.getOperand(0);
+    unsigned ExBits = ExVT.getScalarSizeInBits();
+    if (DemandedBits.getActiveBits() <= ExBits)
+      return Op0;
+    // If the input is already sign extended, just drop the extension.
+    unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
+    if (NumSignBits >= (BitWidth - ExBits + 1))
+      return Op0;
      break;
    }
    case ISD::INSERT_VECTOR_ELT: {
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll

index c83fe01..f3a1b44 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1661,7 +1661,6 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
  ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
  ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
  ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
  ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
  ; AVX1-NEXT:    cmpw $-1, %ax
  ; AVX1-NEXT:    sete %al
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll

index 0ac7c80..e08ec78 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -1617,7 +1617,6 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
  ; SSE-NEXT:    por %xmm3, %xmm1
  ; SSE-NEXT:    por %xmm2, %xmm1
  ; SSE-NEXT:    por %xmm0, %xmm1
-; SSE-NEXT:    psllw $7, %xmm1
  ; SSE-NEXT:    pmovmskb %xmm1, %eax
  ; SSE-NEXT:    testw %ax, %ax
  ; SSE-NEXT:    setne %al
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll

index ee6d019..5b27147 100644 (file)
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -1788,7 +1788,6 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
  ; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
  ; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
  ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
  ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
  ; AVX1-NEXT:    movl %eax, %ecx
  ; AVX1-NEXT:    shrl $8, %ecx
@@ -1880,7 +1879,6 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
  ; SSE-NEXT:    pxor %xmm3, %xmm1
  ; SSE-NEXT:    pxor %xmm2, %xmm1
  ; SSE-NEXT:    pxor %xmm0, %xmm1
-; SSE-NEXT:    psllw $7, %xmm1
  ; SSE-NEXT:    pmovmskb %xmm1, %eax
  ; SSE-NEXT:    movl %eax, %ecx
  ; SSE-NEXT:    shrl $8, %ecx
@@ -1900,7 +1898,6 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
  ; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
  ; AVX1-NEXT:    vpxor %xmm0, %xmm3, %xmm0
  ; AVX1-NEXT:    vpxor %xmm0, %xmm4, %xmm0
-; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
  ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
  ; AVX1-NEXT:    movl %eax, %ecx
  ; AVX1-NEXT:    shrl $8, %ecx
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 18 Jun 2020 15:40:51 +0000 (16:40 +0100)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 18 Jun 2020 15:41:08 +0000 (16:41 +0100)
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-reduce-or-bool.ll		patch \| blob \| history
llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll		patch \| blob \| history