[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Thu, 7 Apr 2016 02:07:10 +0000 (02:07 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Thu, 7 Apr 2016 02:07:10 +0000 (02:07 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Thu, 7 Apr 2016 02:07:10 +0000 (02:07 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Thu, 7 Apr 2016 02:07:10 +0000 (02:07 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 8ff0019..a93eaa5 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26137,6 +26137,73 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
  
+/// Combine:
+///   (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
+/// to:
+///   (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
+/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
+/// Note that this is only legal for some op/cc combinations.
+static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
+                                       SelectionDAG &DAG) {
+  // This combine only operates on CMP-like nodes.
+  if (!(Cmp.getOpcode() == X86ISD::CMP ||
+        (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
+    return SDValue();
+
+  // This only applies to variations of the common case:
+  //   (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
+  //   (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
+  //   (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
+  //   (icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
+  // Using the proper condcodes (see below), overflow is checked for.
+
+  // FIXME: We can generalize both constraints:
+  // - XOR/OR/AND (if they were made to survive AtomicExpand)
+  // - LHS != 1
+  // if the result is compared.
+
+  SDValue CmpLHS = Cmp.getOperand(0);
+  SDValue CmpRHS = Cmp.getOperand(1);
+
+  if (!CmpLHS.hasOneUse())
+    return SDValue();
+
+  auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
+  if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
+    return SDValue();
+
+  const unsigned Opc = CmpLHS.getOpcode();
+
+  if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB)
+    return SDValue();
+
+  SDValue OpRHS = CmpLHS.getOperand(2);
+  auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS);
+  if (!OpRHSC)
+    return SDValue();
+
+  APInt Addend = OpRHSC->getAPIntValue();
+  if (Opc == ISD::ATOMIC_LOAD_SUB)
+    Addend = -Addend;
+
+  if (CC == X86::COND_S && Addend == 1)
+    CC = X86::COND_LE;
+  else if (CC == X86::COND_NS && Addend == 1)
+    CC = X86::COND_G;
+  else if (CC == X86::COND_G && Addend == -1)
+    CC = X86::COND_GE;
+  else if (CC == X86::COND_LE && Addend == -1)
+    CC = X86::COND_L;
+  else
+    return SDValue();
+
+  SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);
+  DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
+                                DAG.getUNDEF(CmpLHS.getValueType()));
+  DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
+  return LockOp;
+}
+
  // Check whether a boolean test is testing a boolean value generated by
  // X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
  // code.
@@ -26308,6 +26375,16 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
    return true;
  }
  
+/// Optimize an EFLAGS definition used according to the condition code \p CC
+/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
+/// uses of chain values.
+static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
+                                  SelectionDAG &DAG) {
+  if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
+    return R;
+  return combineSetCCAtomicArith(EFLAGS, CC, DAG);
+}
+
  /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
  static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
                             TargetLowering::DAGCombinerInfo &DCI,
@@ -26334,15 +26411,14 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
      }
    }
  
-  SDValue Flags;
-
-  Flags = checkBoolTestSetCCCombine(Cond, CC);
-  if (Flags.getNode() &&
-      // Extra check as FCMOV only supports a subset of X86 cond.
-      (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
-    SDValue Ops[] = { FalseOp, TrueOp,
-                      DAG.getConstant(CC, DL, MVT::i8), Flags };
-    return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+  // Try to simplify the EFLAGS and condition code operands.
+  // We can't always do this as FCMOV only supports a subset of X86 cond.
+  if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
+    if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
+      SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
+        Flags};
+      return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+    }
    }
  
    // If this is a select between two integer constants, try to do some
@@ -29268,7 +29344,8 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
    if (CC == X86::COND_B)
      return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
  
-  if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
+  // Try to simplify the EFLAGS and condition code operands.
+  if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
      SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
      return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
    }
@@ -29281,15 +29358,16 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
                               TargetLowering::DAGCombinerInfo &DCI,
                               const X86Subtarget &Subtarget) {
    SDLoc DL(N);
-  SDValue Chain = N->getOperand(0);
-  SDValue Dest = N->getOperand(1);
    SDValue EFLAGS = N->getOperand(3);
    X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
  
-  if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
+  // Try to simplify the EFLAGS and condition code operands.
+  // Make sure to not keep references to operands, as combineSetCCEFLAGS can
+  // RAUW them under us.
+  if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
      SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
-    return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
-                       Flags);
+    return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
+                       N->getOperand(1), Cond, Flags);
    }
  
    return SDValue();
diff --git a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll

index a7463d3..fe872bd 100644 (file)
--- a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
+++ b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
@@ -4,10 +4,8 @@
  define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
  ; CHECK-LABEL: test_add_1_cmov_slt:
  ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    lock xaddq %rax, (%rdi)
-; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    cmovnsl %edx, %esi
+; CHECK-NEXT:    lock incq (%rdi)
+; CHECK-NEXT:    cmovgl %edx, %esi
  ; CHECK-NEXT:    movl %esi, %eax
  ; CHECK-NEXT:    retq
  entry:
@@ -20,10 +18,8 @@ entry:
  define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 {
  ; CHECK-LABEL: test_add_1_cmov_sge:
  ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    lock xaddq %rax, (%rdi)
-; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    cmovsl %edx, %esi
+; CHECK-NEXT:    lock incq (%rdi)
+; CHECK-NEXT:    cmovlel %edx, %esi
  ; CHECK-NEXT:    movl %esi, %eax
  ; CHECK-NEXT:    retq
  entry:
@@ -36,10 +32,8 @@ entry:
  define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
  ; CHECK-LABEL: test_sub_1_cmov_sle:
  ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    lock xaddq %rax, (%rdi)
-; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    cmovgl %edx, %esi
+; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    cmovgel %edx, %esi
  ; CHECK-NEXT:    movl %esi, %eax
  ; CHECK-NEXT:    retq
  entry:
@@ -52,10 +46,8 @@ entry:
  define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
  ; CHECK-LABEL: test_sub_1_cmov_sgt:
  ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    lock xaddq %rax, (%rdi)
-; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    cmovlel %edx, %esi
+; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    cmovll %edx, %esi
  ; CHECK-NEXT:    movl %esi, %eax
  ; CHECK-NEXT:    retq
  entry:
@@ -83,10 +75,8 @@ entry:
  define i8 @test_sub_1_setcc_sgt(i64* %p) #0 {
  ; CHECK-LABEL: test_sub_1_setcc_sgt:
  ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    lock xaddq %rax, (%rdi)
-; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    setge %al
  ; CHECK-NEXT:    retq
  entry:
    %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
@@ -98,10 +88,8 @@ entry:
  define i32 @test_add_1_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 {
  ; CHECK-LABEL: test_add_1_brcond_sge:
  ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    lock xaddq %rax, (%rdi)
-; CHECK-NEXT:    testq %rax, %rax
-; CHECK-NEXT:    js .LBB6_2
+; CHECK-NEXT:    lock incq (%rdi)
+; CHECK-NEXT:    jle .LBB6_2
  ; CHECK-NEXT:  # BB#1: # %t
  ; CHECK-NEXT:    movl %esi, %eax
  ; CHECK-NEXT:    retq
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Thu, 7 Apr 2016 02:07:10 +0000 (02:07 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Thu, 7 Apr 2016 02:07:10 +0000 (02:07 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/atomic-eflags-reuse.ll		patch \| blob \| history