[DAGCombiner] try to form test+set out of shift+mask patterns

author Sanjay Patel <spatel@rotateright.com>

Mon, 2 Sep 2019 14:52:09 +0000 (14:52 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Mon, 2 Sep 2019 14:52:09 +0000 (14:52 +0000)
author Sanjay Patel <spatel@rotateright.com>
Mon, 2 Sep 2019 14:52:09 +0000 (14:52 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Mon, 2 Sep 2019 14:52:09 +0000 (14:52 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index aa69ea5..3ab3a57 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5188,6 +5188,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
    return T1;
  }
  
+/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
+/// For a target with a bit test, this is expected to become test + set and save
+/// at least 1 instruction.
+static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
+  assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
+
+  // This is probably not worthwhile without a supported type.
+  EVT VT = And->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.isTypeLegal(VT))
+    return SDValue();
+
+  // Look through an optional extension and find a 'not'.
+  // TODO: Should we favor test+set even without the 'not' op?
+  SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
+  if (Not.getOpcode() == ISD::ANY_EXTEND)
+    Not = Not.getOperand(0);
+  if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
+    return SDValue();
+
+  // Look though an optional truncation. The source operand may not be the same
+  // type as the original 'and', but that is ok because we are masking off
+  // everything but the low bit.
+  SDValue Srl = Not.getOperand(0);
+  if (Srl.getOpcode() == ISD::TRUNCATE)
+    Srl = Srl.getOperand(0);
+
+  // Match a shift-right by constant.
+  if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
+      !isa<ConstantSDNode>(Srl.getOperand(1)))
+    return SDValue();
+
+  // We might have looked through casts that make this transform invalid.
+  // TODO: If the source type is wider than the result type, do the mask and
+  //       compare in the source type.
+  const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
+  unsigned VTBitWidth = VT.getSizeInBits();
+  if (ShiftAmt.uge(VTBitWidth))
+    return SDValue();
+
+  // Turn this into a bit-test pattern using mask op + setcc:
+  // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
+  SDLoc DL(And);
+  SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
+  EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue Mask = DAG.getConstant(
+      APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
+  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
+  return DAG.getZExtOrTrunc(Setcc, DL, VT);
+}
+
  SDValue DAGCombiner::visitAND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -5471,6 +5524,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
      return Shifts;
  
+  if (TLI.hasBitTest(N0, N1))
+    if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
+      return V;
+
    return SDValue();
  }
  
diff --git a/llvm/test/CodeGen/Hexagon/tstbit.ll b/llvm/test/CodeGen/Hexagon/tstbit.ll

index a21ad8d..7c80fcb 100644 (file)
--- a/llvm/test/CodeGen/Hexagon/tstbit.ll
+++ b/llvm/test/CodeGen/Hexagon/tstbit.ll
@@ -20,15 +20,25 @@ b0:
    ret i32 %v3
  }
  
+; TODO: Match to tstbit?
+
  define i64 @is_upper_bit_clear_i64(i64 %x) #0 {
  ; CHECK-LABEL: is_upper_bit_clear_i64:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     p0 = tstbit(r1,#5)
-; CHECK-NEXT:     r1 = #0
+; CHECK-NEXT:     r4 = #0
+; CHECK-NEXT:     r2 = #32
+; CHECK-NEXT:     r7:6 = combine(#0,#0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r5 = and(r1,r2)
+; CHECK-NEXT:     r1 = r4
  ; CHECK-NEXT:    }
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     r0 = mux(p0,#0,#1)
+; CHECK-NEXT:     p0 = cmp.eq(r5:4,r7:6)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = mux(p0,#1,#0)
  ; CHECK-NEXT:     jumpr r31
  ; CHECK-NEXT:    }
    %sh = lshr i64 %x, 37
@@ -37,15 +47,24 @@ define i64 @is_upper_bit_clear_i64(i64 %x) #0 {
    ret i64 %r
  }
  
+; TODO: Match to tstbit?
+
  define i64 @is_lower_bit_clear_i64(i64 %x) #0 {
  ; CHECK-LABEL: is_lower_bit_clear_i64:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     p0 = tstbit(r0,#27)
+; CHECK-NEXT:     r5:4 = combine(#0,#0)
+; CHECK-NEXT:     r2 = ##134217728
  ; CHECK-NEXT:     r1 = #0
  ; CHECK-NEXT:    }
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     r0 = mux(p0,#0,#1)
+; CHECK-NEXT:     r0 = and(r0,r2)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     p0 = cmp.eq(r1:0,r5:4)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = mux(p0,#1,#0)
  ; CHECK-NEXT:     jumpr r31
  ; CHECK-NEXT:    }
    %sh = lshr i64 %x, 27
@@ -54,14 +73,16 @@ define i64 @is_lower_bit_clear_i64(i64 %x) #0 {
    ret i64 %r
  }
  
+; TODO: Match to tstbit?
+
  define i32 @is_bit_clear_i32(i32 %x) #0 {
  ; CHECK-LABEL: is_bit_clear_i32:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     p0 = tstbit(r0,#27)
+; CHECK-NEXT:     r0 = and(r0,##134217728)
  ; CHECK-NEXT:    }
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     r0 = mux(p0,#0,#1)
+; CHECK-NEXT:     r0 = cmp.eq(r0,#0)
  ; CHECK-NEXT:     jumpr r31
  ; CHECK-NEXT:    }
    %sh = lshr i32 %x, 27
@@ -70,14 +91,16 @@ define i32 @is_bit_clear_i32(i32 %x) #0 {
    ret i32 %r
  }
  
+; TODO: Match to tstbit?
+
  define i16 @is_bit_clear_i16(i16 %x) #0 {
  ; CHECK-LABEL: is_bit_clear_i16:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     p0 = tstbit(r0,#7)
+; CHECK-NEXT:     r0 = and(r0,#128)
  ; CHECK-NEXT:    }
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     r0 = mux(p0,#0,#1)
+; CHECK-NEXT:     r0 = cmp.eq(r0,#0)
  ; CHECK-NEXT:     jumpr r31
  ; CHECK-NEXT:    }
    %sh = lshr i16 %x, 7
@@ -86,14 +109,16 @@ define i16 @is_bit_clear_i16(i16 %x) #0 {
    ret i16 %r
  }
  
+; TODO: Match to tstbit?
+
  define i8 @is_bit_clear_i8(i8 %x) #0 {
  ; CHECK-LABEL: is_bit_clear_i8:
  ; CHECK:       // %bb.0:
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     p0 = tstbit(r0,#3)
+; CHECK-NEXT:     r0 = and(r0,#8)
  ; CHECK-NEXT:    }
  ; CHECK-NEXT:    {
-; CHECK-NEXT:     r0 = mux(p0,#0,#1)
+; CHECK-NEXT:     r0 = cmp.eq(r0,#0)
  ; CHECK-NEXT:     jumpr r31
  ; CHECK-NEXT:    }
    %sh = lshr i8 %x, 3
diff --git a/llvm/test/CodeGen/X86/test-vs-bittest.ll b/llvm/test/CodeGen/X86/test-vs-bittest.ll

index 8873a1c..2f93ad5 100644 (file)
--- a/llvm/test/CodeGen/X86/test-vs-bittest.ll
+++ b/llvm/test/CodeGen/X86/test-vs-bittest.ll
@@ -393,10 +393,9 @@ no:
  define i64 @is_upper_bit_clear_i64(i64 %x) {
  ; CHECK-LABEL: is_upper_bit_clear_i64:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    shrq $37, %rax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    btq $37, %rdi
+; CHECK-NEXT:    setae %al
  ; CHECK-NEXT:    retq
    %sh = lshr i64 %x, 37
    %m = and i64 %sh, 1
@@ -407,10 +406,9 @@ define i64 @is_upper_bit_clear_i64(i64 %x) {
  define i64 @is_lower_bit_clear_i64(i64 %x) {
  ; CHECK-LABEL: is_lower_bit_clear_i64:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rax
-; CHECK-NEXT:    shrl $27, %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testl $134217728, %edi # imm = 0x8000000
+; CHECK-NEXT:    sete %al
  ; CHECK-NEXT:    retq
    %sh = lshr i64 %x, 27
    %m = and i64 %sh, 1
@@ -421,10 +419,9 @@ define i64 @is_lower_bit_clear_i64(i64 %x) {
  define i32 @is_bit_clear_i32(i32 %x) {
  ; CHECK-LABEL: is_bit_clear_i32:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shrl $27, %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testl $134217728, %edi # imm = 0x8000000
+; CHECK-NEXT:    sete %al
  ; CHECK-NEXT:    retq
    %sh = lshr i32 %x, 27
    %n = xor i32 %sh, -1
@@ -435,10 +432,9 @@ define i32 @is_bit_clear_i32(i32 %x) {
  define i16 @is_bit_clear_i16(i16 %x) {
  ; CHECK-LABEL: is_bit_clear_i16:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl %di, %eax
-; CHECK-NEXT:    shrl $7, %eax
-; CHECK-NEXT:    notl %eax
-; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb $-128, %dil
+; CHECK-NEXT:    sete %al
  ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
  ; CHECK-NEXT:    retq
    %sh = lshr i16 %x, 7
@@ -450,11 +446,8 @@ define i16 @is_bit_clear_i16(i16 %x) {
  define i8 @is_bit_clear_i8(i8 %x) {
  ; CHECK-LABEL: is_bit_clear_i8:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shrb $3, %al
-; CHECK-NEXT:    notb %al
-; CHECK-NEXT:    andb $1, %al
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    testb $8, %dil
+; CHECK-NEXT:    sete %al
  ; CHECK-NEXT:    retq
    %sh = lshr i8 %x, 3
    %m = and i8 %sh, 1
@@ -462,6 +455,8 @@ define i8 @is_bit_clear_i8(i8 %x) {
    ret i8 %r
  }
  
+; TODO: We could use bt/test on the 64-bit value.
+
  define i8 @overshift(i64 %x) {
  ; CHECK-LABEL: overshift:
  ; CHECK:       # %bb.0:
author	Sanjay Patel <spatel@rotateright.com>
	Mon, 2 Sep 2019 14:52:09 +0000 (14:52 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Mon, 2 Sep 2019 14:52:09 +0000 (14:52 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/test/CodeGen/Hexagon/tstbit.ll		patch \| blob \| history
llvm/test/CodeGen/X86/test-vs-bittest.ll		patch \| blob \| history