From 35f21cba1304f989fd755fc44992142f9cecbc65 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 16 Dec 2016 16:34:59 +0000 Subject: [PATCH] [X86] Fold (setcc (cmp (atomic_load_add x, -C) C), COND) to (setcc (LADD x, -C), COND) (PR31367) atomic_load_add returns the value before addition, but sets EFLAGS based on the result of the addition. That means it's setting the flags based on effectively subtracting C from the value at x, which is also what the outer cmp does. This targets a pattern that occurs frequently with reference counting pointers: void decrement(long volatile *ptr) { if (_InterlockedDecrement(ptr) == 0) release(); } Clang would previously compile it (for 32-bit at -Os) as: 00000000 : 0: 8b 44 24 04 mov 0x4(%esp),%eax 4: 31 c9 xor %ecx,%ecx 6: 49 dec %ecx 7: f0 0f c1 08 lock xadd %ecx,(%eax) b: 83 f9 01 cmp $0x1,%ecx e: 0f 84 00 00 00 00 je 14 14: c3 ret and with this patch it becomes: 00000000 : 0: 8b 44 24 04 mov 0x4(%esp),%eax 4: f0 ff 08 lock decl (%eax) 7: 0f 84 00 00 00 00 je d d: c3 ret (Equivalent variants with _InterlockedExchangeAdd, std::atomic<>'s fetch_add or pre-decrement operator generate the same code.) Differential Revision: https://reviews.llvm.org/D27781 llvm-svn: 289955 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 ++++++++++++++------ llvm/test/CodeGen/X86/atomic-eflags-reuse.ll | 41 ++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c9aa9e2..82a7f8c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28879,11 +28879,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// Combine: +/// Combine brcond/cmov/setcc/.. based on comparing the result of +/// atomic_load_add to use EFLAGS produced by the addition +/// directly if possible. For example: +/// +/// (setcc (cmp (atomic_load_add x, -C) C), COND_E) +/// becomes: +/// (setcc (LADD x, -C), COND_E) +/// +/// and /// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S) -/// to: +/// becomes: /// (brcond/cmov/setcc .., (LADD x, 1), COND_LE) -/// i.e., reusing the EFLAGS produced by the LOCKed instruction. +/// /// Note that this is only legal for some op/cc combinations. static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, SelectionDAG &DAG) { @@ -28892,7 +28900,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) return SDValue(); - // This only applies to variations of the common case: + // This applies to variations of the common case: // (icmp slt x, 0) -> (icmp sle (add x, 1), 0) // (icmp sge x, 0) -> (icmp sgt (add x, 1), 0) // (icmp sle x, 0) -> (icmp slt (sub x, 1), 0) @@ -28911,8 +28919,9 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, return SDValue(); auto *CmpRHSC = dyn_cast(CmpRHS); - if (!CmpRHSC || CmpRHSC->getZExtValue() != 0) + if (!CmpRHSC) return SDValue(); + APInt Comparand = CmpRHSC->getAPIntValue(); const unsigned Opc = CmpLHS.getOpcode(); @@ -28928,13 +28937,15 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, if (Opc == ISD::ATOMIC_LOAD_SUB) Addend = -Addend; - if (CC == X86::COND_S && Addend == 1) + if (Comparand == -Addend) + CC = CC; // No change. + else if (CC == X86::COND_S && Comparand == 0 && Addend == 1) CC = X86::COND_LE; - else if (CC == X86::COND_NS && Addend == 1) + else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1) CC = X86::COND_G; - else if (CC == X86::COND_G && Addend == -1) + else if (CC == X86::COND_G && Comparand == 0 && Addend == -1) CC = X86::COND_GE; - else if (CC == X86::COND_LE && Addend == -1) + else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1) CC = X86::COND_L; else return SDValue(); diff --git a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll index dc1814b..8021563 100644 --- a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll +++ b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll @@ -176,4 +176,45 @@ entry: ret i8 %tmp2 } +define i8 @test_sub_1_setcc_eq(i64* %p) #0 { +; CHECK-LABEL: test_sub_1_setcc_eq: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: sete %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst + %tmp1 = icmp eq i64 %tmp0, 1 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +define i8 @test_add_5_setcc_ne(i64* %p) #0 { +; CHECK-LABEL: test_add_5_setcc_ne: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lock addq $5, (%rdi) +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst + %tmp1 = icmp ne i64 %tmp0, -5 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + +define i8 @test_add_5_setcc_ne_comparand_mismatch(i64* %p) #0 { +; CHECK-LABEL: test_add_5_setcc_ne_comparand_mismatch: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $5, %eax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: setne %al +; CHECK-NEXT: retq +entry: + %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst + %tmp1 = icmp ne i64 %tmp0, 0 + %tmp2 = zext i1 %tmp1 to i8 + ret i8 %tmp2 +} + attributes #0 = { nounwind } -- 2.7.4