setSubclassData<FailureOrderingField>(Ordering);
}
+ /// Returns a single ordering which is at least as strong as both the
+ /// success and failure orderings for this cmpxchg.
+ AtomicOrdering getMergedOrdering() const {
+ if (getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
+ return AtomicOrdering::SequentiallyConsistent;
+ if (getFailureOrdering() == AtomicOrdering::Acquire) {
+ if (getSuccessOrdering() == AtomicOrdering::Monotonic)
+ return AtomicOrdering::Acquire;
+ if (getSuccessOrdering() == AtomicOrdering::Release)
+ return AtomicOrdering::AcquireRelease;
+ }
+ return getSuccessOrdering();
+ }
+
/// Returns the synchronization scope ID of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const {
return SSID;
TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
TargetLoweringBase::AtomicExpansionKind::None &&
(isReleaseOrStronger(CASI->getSuccessOrdering()) ||
- isAcquireOrStronger(CASI->getSuccessOrdering()))) {
+ isAcquireOrStronger(CASI->getSuccessOrdering()) ||
+ isAcquireOrStronger(CASI->getFailureOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
// insertion, with a stronger one on the success path than on the
// failure path. As a result, fence insertion is directly done by
// expandAtomicCmpXchg in that case.
- FenceOrdering = CASI->getSuccessOrdering();
+ FenceOrdering = CASI->getMergedOrdering();
CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
CASI->setFailureOrdering(AtomicOrdering::Monotonic);
}
"NewVal_Shifted");
Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
- CI->getSuccessOrdering());
+ CI->getMergedOrdering());
Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
Value *Res = UndefValue::get(CI->getType());
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
// care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
// should preserve the ordering.
bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
- AtomicOrdering MemOpOrder =
- ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
+ AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
+ ? AtomicOrdering::Monotonic
+ : CI->getMergedOrdering();
// In implementations which use a barrier to achieve release semantics, we can
// delay emitting this barrier until we know a store is actually going to be
define void @val_compare_and_swap_monotonic_seqcst(i128* %p, i128 %oldval, i128 %newval) {
; CHECK-LLSC-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
-; CHECK-LLSC-O1: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
+; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
; [... LOTS of stuff that is generic IR unrelated to atomic operations ...]
-; CHECK-LLSC-O1: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
+; CHECK-LLSC-O1: stlxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
;
; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
; CHECK-CAS-O1: caspal x2, x3, x4, x5, [x0]
define void @val_compare_and_swap_release_acquire(i128* %p, i128 %oldval, i128 %newval) {
; CHECK-LLSC-O1-LABEL: val_compare_and_swap_release_acquire:
-; CHECK-LLSC-O1: ldxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
+; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0]
; [... LOTS of stuff that is generic IR unrelated to atomic operations ...]
; CHECK-LLSC-O1: stlxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0]
;
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: LBB4_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT: cmp x8, x1
; CHECK-NOLSE-O1-NEXT: b.ne LBB4_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB4_1 Depth=1
-; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x0]
+; CHECK-NOLSE-O1-NEXT: stlxr w9, x2, [x0]
; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB4_1
; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end
; CHECK-NOLSE-O1-NEXT: mov x0, x8
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: LBB5_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0]
+; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0]
; CHECK-NOLSE-O1-NEXT: cmp x8, x1
; CHECK-NOLSE-O1-NEXT: b.ne LBB5_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; PPC64LE-NEXT: .LBB44_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
-; PPC64LE-NEXT: bne 0, .LBB44_3
+; PPC64LE-NEXT: bne 0, .LBB44_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
-; PPC64LE-NEXT: b .LBB44_1
-; PPC64LE-NEXT: .LBB44_3:
+; PPC64LE-NEXT: bne 0, .LBB44_1
+; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB44_4:
; PPC64LE-NEXT: stbcx. 6, 0, 3
+; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire
ret void
; PPC64LE-NEXT: .LBB54_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
-; PPC64LE-NEXT: bne 0, .LBB54_3
+; PPC64LE-NEXT: bne 0, .LBB54_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
-; PPC64LE-NEXT: b .LBB54_1
-; PPC64LE-NEXT: .LBB54_3:
+; PPC64LE-NEXT: bne 0, .LBB54_1
+; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB54_4:
; PPC64LE-NEXT: sthcx. 6, 0, 3
+; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire
ret void
; PPC64LE-NEXT: .LBB64_1:
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
-; PPC64LE-NEXT: bne 0, .LBB64_3
+; PPC64LE-NEXT: bne 0, .LBB64_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
-; PPC64LE-NEXT: b .LBB64_1
-; PPC64LE-NEXT: .LBB64_3:
+; PPC64LE-NEXT: bne 0, .LBB64_1
+; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB64_4:
; PPC64LE-NEXT: stwcx. 6, 0, 3
+; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire
ret void
; PPC64LE-NEXT: .LBB74_1:
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpd 4, 6
-; PPC64LE-NEXT: bne 0, .LBB74_3
+; PPC64LE-NEXT: bne 0, .LBB74_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
-; PPC64LE-NEXT: b .LBB74_1
-; PPC64LE-NEXT: .LBB74_3:
+; PPC64LE-NEXT: bne 0, .LBB74_1
+; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB74_4:
; PPC64LE-NEXT: stdcx. 6, 0, 3
+; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire
ret void
; PPC64LE-NEXT: .LBB84_1:
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
-; PPC64LE-NEXT: bne 0, .LBB84_3
+; PPC64LE-NEXT: bne 0, .LBB84_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
-; PPC64LE-NEXT: b .LBB84_1
-; PPC64LE-NEXT: .LBB84_3:
+; PPC64LE-NEXT: bne 0, .LBB84_1
+; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB84_4:
; PPC64LE-NEXT: stbcx. 6, 0, 3
+; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i8* %ptr, i8 %cmp, i8 %val syncscope("singlethread") release acquire
ret void
; PPC64LE-NEXT: .LBB94_1:
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
-; PPC64LE-NEXT: bne 0, .LBB94_3
+; PPC64LE-NEXT: bne 0, .LBB94_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
-; PPC64LE-NEXT: b .LBB94_1
-; PPC64LE-NEXT: .LBB94_3:
+; PPC64LE-NEXT: bne 0, .LBB94_1
+; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB94_4:
; PPC64LE-NEXT: sthcx. 6, 0, 3
+; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i16* %ptr, i16 %cmp, i16 %val syncscope("singlethread") release acquire
ret void
; PPC64LE-NEXT: .LBB104_1:
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmpw 4, 6
-; PPC64LE-NEXT: bne 0, .LBB104_3
+; PPC64LE-NEXT: bne 0, .LBB104_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
-; PPC64LE-NEXT: b .LBB104_1
-; PPC64LE-NEXT: .LBB104_3:
+; PPC64LE-NEXT: bne 0, .LBB104_1
+; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB104_4:
; PPC64LE-NEXT: stwcx. 6, 0, 3
+; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i32* %ptr, i32 %cmp, i32 %val syncscope("singlethread") release acquire
ret void
; PPC64LE-NEXT: .LBB114_1:
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpd 4, 6
-; PPC64LE-NEXT: bne 0, .LBB114_3
+; PPC64LE-NEXT: bne 0, .LBB114_4
; PPC64LE-NEXT: # %bb.2:
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
-; PPC64LE-NEXT: b .LBB114_1
-; PPC64LE-NEXT: .LBB114_3:
+; PPC64LE-NEXT: bne 0, .LBB114_1
+; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB114_4:
; PPC64LE-NEXT: stdcx. 6, 0, 3
+; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg i64* %ptr, i64 %cmp, i64 %val syncscope("singlethread") release acquire
ret void
; RV32IA-NEXT: andi a2, a2, 255
; RV32IA-NEXT: sll a0, a2, a0
; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a2, (a3)
+; RV32IA-NEXT: lr.w.aq a2, (a3)
; RV32IA-NEXT: and a5, a2, a4
; RV32IA-NEXT: bne a5, a1, .LBB4_3
; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
; RV64IA-NEXT: andi a2, a2, 255
; RV64IA-NEXT: sllw a0, a2, a0
; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.w a2, (a3)
+; RV64IA-NEXT: lr.w.aq a2, (a3)
; RV64IA-NEXT: and a5, a2, a4
; RV64IA-NEXT: bne a5, a1, .LBB4_3
; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
; RV32IA-NEXT: and a2, a2, a4
; RV32IA-NEXT: sll a0, a2, a0
; RV32IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a2, (a3)
+; RV32IA-NEXT: lr.w.aq a2, (a3)
; RV32IA-NEXT: and a4, a2, a5
; RV32IA-NEXT: bne a4, a1, .LBB14_3
; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1
; RV64IA-NEXT: and a2, a2, a4
; RV64IA-NEXT: sllw a0, a2, a0
; RV64IA-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT: lr.w a2, (a3)
+; RV64IA-NEXT: lr.w.aq a2, (a3)
; RV64IA-NEXT: and a4, a2, a5
; RV64IA-NEXT: bne a4, a1, .LBB14_3
; RV64IA-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1