From: Phoebe Wang Date: Sun, 20 Nov 2022 02:42:26 +0000 (+0800) Subject: [X86] Use lock or/and/xor for cases that we only care about the EFLAGS X-Git-Tag: upstream/17.0.6~27085 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=510e5fba16382eef577eb18f4b2c136a3ea60fc1;p=platform%2Fupstream%2Fllvm.git [X86] Use lock or/and/xor for cases that we only care about the EFLAGS This is a follow up of D137711 to fix the reset of #58685. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D138294 --- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 34f884f..7c39f14 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -78,6 +78,12 @@ let TargetPrefix = "x86" in { [ImmArg>]>; def int_x86_atomic_sub_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty], [ImmArg>]>; + def int_x86_atomic_or_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty], + [ImmArg>]>; + def int_x86_atomic_and_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty], + [ImmArg>]>; + def int_x86_atomic_xor_cc : Intrinsic<[llvm_i8_ty], [llvm_ptr_ty, llvm_anyint_ty, llvm_i32_ty], + [ImmArg>]>; } // Read Processor Register. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dc88213..89eeab5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5661,7 +5661,10 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::x86_axor32: case Intrinsic::x86_axor64: case Intrinsic::x86_atomic_add_cc: - case Intrinsic::x86_atomic_sub_cc: { + case Intrinsic::x86_atomic_sub_cc: + case Intrinsic::x86_atomic_or_cc: + case Intrinsic::x86_atomic_and_cc: + case Intrinsic::x86_atomic_xor_cc: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.ptrVal = I.getArgOperand(0); unsigned Size = I.getArgOperand(1)->getType()->getScalarSizeInBits(); @@ -28385,7 +28388,10 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, {Chain, Op1, Op2}, VT, MMO); } case Intrinsic::x86_atomic_add_cc: - case Intrinsic::x86_atomic_sub_cc: { + case Intrinsic::x86_atomic_sub_cc: + case Intrinsic::x86_atomic_or_cc: + case Intrinsic::x86_atomic_and_cc: + case Intrinsic::x86_atomic_xor_cc: { SDLoc DL(Op); SDValue Chain = Op.getOperand(0); SDValue Op1 = Op.getOperand(2); @@ -28402,6 +28408,15 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, case Intrinsic::x86_atomic_sub_cc: Opc = X86ISD::LSUB; break; + case Intrinsic::x86_atomic_or_cc: + Opc = X86ISD::LOR; + break; + case Intrinsic::x86_atomic_and_cc: + Opc = X86ISD::LAND; + break; + case Intrinsic::x86_atomic_xor_cc: + Opc = X86ISD::LXOR; + break; } MachineMemOperand *MMO = cast(Op)->getMemOperand(); SDValue LockArith = @@ -31417,6 +31432,23 @@ static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI) { return Pred == CmpInst::ICMP_SLT; return false; } + if (Opc == AtomicRMWInst::Or) { + if (match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value()))) && + match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) + return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT; + } + if (Opc == AtomicRMWInst::And) { + if (match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))) && + match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) + return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLT; + } + if (Opc == AtomicRMWInst::Xor) { + if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) + return Pred == CmpInst::ICMP_EQ; + if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value()))) && + match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) + return Pred == CmpInst::ICMP_SLT; + } return false; } @@ -31446,6 +31478,15 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic( case AtomicRMWInst::Sub: IID = Intrinsic::x86_atomic_sub_cc; break; + case AtomicRMWInst::Or: + IID = Intrinsic::x86_atomic_or_cc; + break; + case AtomicRMWInst::And: + IID = Intrinsic::x86_atomic_and_cc; + break; + case AtomicRMWInst::Xor: + IID = Intrinsic::x86_atomic_xor_cc; + break; } Function *CmpArith = Intrinsic::getDeclaration(AI->getModule(), IID, AI->getType()); @@ -31487,6 +31528,8 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { case AtomicRMWInst::Or: case AtomicRMWInst::And: case AtomicRMWInst::Xor: + if (shouldExpandCmpArithRMWInIR(AI)) + return AtomicExpansionKind::CmpArithIntrinsic; return shouldExpandLogicAtomicRMWInIR(AI); case AtomicRMWInst::Nand: case AtomicRMWInst::Max: diff --git a/llvm/test/CodeGen/X86/pr58685.ll b/llvm/test/CodeGen/X86/pr58685.ll index 2323162..0bd8785 100644 --- a/llvm/test/CodeGen/X86/pr58685.ll +++ b/llvm/test/CodeGen/X86/pr58685.ll @@ -51,16 +51,7 @@ define i1 @lock_sub_sets(ptr %0, i32 %1) nounwind { define i1 @lock_or_sete(ptr %0, i32 %1) nounwind { ; CHECK-LABEL: lock_or_sete: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB4_1: # %atomicrmw.start -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: orl %esi, %ecx -; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi) -; CHECK-NEXT: jne .LBB4_1 -; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: lock orl %esi, (%rdi) ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4 @@ -72,18 +63,8 @@ define i1 @lock_or_sete(ptr %0, i32 %1) nounwind { define i1 @lock_or_sets(ptr %0, i32 %1) nounwind { ; CHECK-LABEL: lock_or_sets: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB5_1: # %atomicrmw.start -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: orl %esi, %ecx -; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi) -; CHECK-NEXT: jne .LBB5_1 -; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: orl %esi, %eax -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: lock orl %esi, (%rdi) +; CHECK-NEXT: sets %al ; CHECK-NEXT: retq %3 = atomicrmw or ptr %0, i32 %1 seq_cst, align 4 %4 = or i32 %3, %1 @@ -94,16 +75,7 @@ define i1 @lock_or_sets(ptr %0, i32 %1) nounwind { define i1 @lock_and_sete(ptr %0, i32 %1) nounwind { ; CHECK-LABEL: lock_and_sete: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB6_1: # %atomicrmw.start -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl %esi, %ecx -; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi) -; CHECK-NEXT: jne .LBB6_1 -; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: testl %esi, %eax +; CHECK-NEXT: lock andl %esi, (%rdi) ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4 @@ -115,18 +87,8 @@ define i1 @lock_and_sete(ptr %0, i32 %1) nounwind { define i1 @lock_and_sets(ptr %0, i32 %1) nounwind { ; CHECK-LABEL: lock_and_sets: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB7_1: # %atomicrmw.start -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: andl %esi, %ecx -; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi) -; CHECK-NEXT: jne .LBB7_1 -; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: andl %esi, %eax -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: lock andl %esi, (%rdi) +; CHECK-NEXT: sets %al ; CHECK-NEXT: retq %3 = atomicrmw and ptr %0, i32 %1 seq_cst, align 4 %4 = and i32 %3, %1 @@ -137,16 +99,7 @@ define i1 @lock_and_sets(ptr %0, i32 %1) nounwind { define i1 @lock_xor_sete(ptr %0, i32 %1) nounwind { ; CHECK-LABEL: lock_xor_sete: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB8_1: # %atomicrmw.start -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: xorl %esi, %ecx -; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi) -; CHECK-NEXT: jne .LBB8_1 -; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: cmpl %esi, %eax +; CHECK-NEXT: lock xorl %esi, (%rdi) ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4 @@ -157,18 +110,8 @@ define i1 @lock_xor_sete(ptr %0, i32 %1) nounwind { define i1 @lock_xor_sets(ptr %0, i32 %1) nounwind { ; CHECK-LABEL: lock_xor_sets: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB9_1: # %atomicrmw.start -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: xorl %esi, %ecx -; CHECK-NEXT: lock cmpxchgl %ecx, (%rdi) -; CHECK-NEXT: jne .LBB9_1 -; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: xorl %esi, %eax -; CHECK-NEXT: shrl $31, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: lock xorl %esi, (%rdi) +; CHECK-NEXT: sets %al ; CHECK-NEXT: retq %3 = atomicrmw xor ptr %0, i32 %1 seq_cst, align 4 %4 = xor i32 %3, %1