return SDValue();
}
+/// Combine:
+/// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
+/// to:
+/// (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
+/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
+/// Note that this is only legal for some op/cc combinations.
+static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
+ SelectionDAG &DAG) {
+ // This combine only operates on CMP-like nodes.
+ if (!(Cmp.getOpcode() == X86ISD::CMP ||
+ (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
+ return SDValue();
+
+ // This only applies to variations of the common case:
+ // (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
+ // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
+ // (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
+ // (icmp sgt x, 0) -> (icmp sge (sub x, 1), 0)
+ // Using the proper condcodes (see below), overflow is checked for.
+
+ // FIXME: We can generalize both constraints:
+ // - XOR/OR/AND (if they were made to survive AtomicExpand)
+ // - LHS != 1
+ // if the result is compared.
+
+ SDValue CmpLHS = Cmp.getOperand(0);
+ SDValue CmpRHS = Cmp.getOperand(1);
+
+ if (!CmpLHS.hasOneUse())
+ return SDValue();
+
+ auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
+ if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
+ return SDValue();
+
+ const unsigned Opc = CmpLHS.getOpcode();
+
+ if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB)
+ return SDValue();
+
+ SDValue OpRHS = CmpLHS.getOperand(2);
+ auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS);
+ if (!OpRHSC)
+ return SDValue();
+
+ APInt Addend = OpRHSC->getAPIntValue();
+ if (Opc == ISD::ATOMIC_LOAD_SUB)
+ Addend = -Addend;
+
+ if (CC == X86::COND_S && Addend == 1)
+ CC = X86::COND_LE;
+ else if (CC == X86::COND_NS && Addend == 1)
+ CC = X86::COND_G;
+ else if (CC == X86::COND_G && Addend == -1)
+ CC = X86::COND_GE;
+ else if (CC == X86::COND_LE && Addend == -1)
+ CC = X86::COND_L;
+ else
+ return SDValue();
+
+ SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);
+ DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
+ DAG.getUNDEF(CmpLHS.getValueType()));
+ DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
+ return LockOp;
+}
+
// Check whether a boolean test is testing a boolean value generated by
// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
// code.
return true;
}
+/// Optimize an EFLAGS definition used according to the condition code \p CC
+/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
+/// uses of chain values.
+static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
+ SelectionDAG &DAG) {
+ if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
+ return R;
+ return combineSetCCAtomicArith(EFLAGS, CC, DAG);
+}
+
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
}
}
- SDValue Flags;
-
- Flags = checkBoolTestSetCCCombine(Cond, CC);
- if (Flags.getNode() &&
- // Extra check as FCMOV only supports a subset of X86 cond.
- (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
- SDValue Ops[] = { FalseOp, TrueOp,
- DAG.getConstant(CC, DL, MVT::i8), Flags };
- return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+ // Try to simplify the EFLAGS and condition code operands.
+ // We can't always do this as FCMOV only supports a subset of X86 cond.
+ if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
+ if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
+ SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
+ Flags};
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+ }
}
// If this is a select between two integer constants, try to do some
if (CC == X86::COND_B)
return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
- if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
+ // Try to simplify the EFLAGS and condition code operands.
+ if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
}
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
- SDValue Chain = N->getOperand(0);
- SDValue Dest = N->getOperand(1);
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
- if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
+ // Try to simplify the EFLAGS and condition code operands.
+ // Make sure to not keep references to operands, as combineSetCCEFLAGS can
+ // RAUW them under us.
+ if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
- return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
- Flags);
+ return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
+ N->getOperand(1), Cond, Flags);
}
return SDValue();
define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_add_1_cmov_slt:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: cmovnsl %edx, %esi
+; CHECK-NEXT: lock incq (%rdi)
+; CHECK-NEXT: cmovgl %edx, %esi
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq
entry:
define i32 @test_add_1_cmov_sge(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_add_1_cmov_sge:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: cmovsl %edx, %esi
+; CHECK-NEXT: lock incq (%rdi)
+; CHECK-NEXT: cmovlel %edx, %esi
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq
entry:
define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_sub_1_cmov_sle:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: cmovgl %edx, %esi
+; CHECK-NEXT: lock decq (%rdi)
+; CHECK-NEXT: cmovgel %edx, %esi
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq
entry:
define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_sub_1_cmov_sgt:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: cmovlel %edx, %esi
+; CHECK-NEXT: lock decq (%rdi)
+; CHECK-NEXT: cmovll %edx, %esi
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq
entry:
define i8 @test_sub_1_setcc_sgt(i64* %p) #0 {
; CHECK-LABEL: test_sub_1_setcc_sgt:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: setg %al
+; CHECK-NEXT: lock decq (%rdi)
+; CHECK-NEXT: setge %al
; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
define i32 @test_add_1_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_add_1_brcond_sge:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB6_2
+; CHECK-NEXT: lock incq (%rdi)
+; CHECK-NEXT: jle .LBB6_2
; CHECK-NEXT: # BB#1: # %t
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq