AtomicOrdering MemOpOrder =
TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
+ // In implementations which use a barrier to achieve release semantics, we can
+ // delay emitting this barrier until we know a store is actually going to be
+ // attempted. The cost of this delay is that we need 2 copies of the block
+ // emitting the load-linked, affecting code size.
+ //
+ // Ideally, this logic would be unconditional except for the minsize check
+ // since in other cases the extra blocks naturally collapse down to the
+ // minimal loop. Unfortunately, this puts too much stress on later
+ // optimisations so we avoid emitting the extra logic in those cases too.
+ bool HasReleasedLoadBB = !CI->isWeak() && TLI->getInsertFencesForAtomic() &&
+ SuccessOrder != Monotonic &&
+ SuccessOrder != Acquire && !F->optForMinSize();
+
+ // There's no overhead for sinking the release barrier in a weak cmpxchg, so
+ // do it even on minsize.
+ bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
+
// Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
//
// The full expansion we produce is:
// [...]
- // fence?
// cmpxchg.start:
- // %loaded = @load.linked(%addr)
- // %should_store = icmp eq %loaded, %desired
- // br i1 %should_store, label %cmpxchg.trystore,
+ // %unreleasedload = @load.linked(%addr)
+ // %should_store = icmp eq %unreleasedload, %desired
+ // br i1 %should_store, label %cmpxchg.fencedstore,
// label %cmpxchg.nostore
+ // cmpxchg.releasingstore:
+ // fence?
+ // br label cmpxchg.trystore
// cmpxchg.trystore:
+ // %loaded.trystore = phi [%unreleasedload, %releasingstore],
+ // [%releasedload, %cmpxchg.releasedload]
// %stored = @store_conditional(%new, %addr)
// %success = icmp eq i32 %stored, 0
- // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
+ // br i1 %success, label %cmpxchg.success,
+ // label %cmpxchg.releasedload/%cmpxchg.failure
+ // cmpxchg.releasedload:
+ // %releasedload = @load.linked(%addr)
+ // %should_store = icmp eq %releasedload, %desired
+ // br i1 %should_store, label %cmpxchg.trystore,
+ // label %cmpxchg.failure
// cmpxchg.success:
// fence?
// br label %cmpxchg.end
// cmpxchg.nostore:
+ // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
+ // [%releasedload,
+ // %cmpxchg.releasedload/%cmpxchg.trystore]
// @load_linked_fail_balance()?
// br label %cmpxchg.failure
// cmpxchg.failure:
// fence?
// br label %cmpxchg.end
// cmpxchg.end:
+ // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
+ // [%loaded.trystore, %cmpxchg.trystore]
// %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
- auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
- auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
+ auto ReleasedLoadBB =
+ BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
+ auto TryStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
+ auto ReleasingStoreBB =
+ BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
+ auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
// This grabs the DebugLoc from CI
IRBuilder<> Builder(CI);
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
- /*IsLoad=*/true);
- Builder.CreateBr(LoopBB);
+ if (UseUnconditionalReleaseBarrier)
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(StartBB);
// Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
- Value *ShouldStore =
- Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
+ Builder.SetInsertPoint(StartBB);
+ Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *ShouldStore = Builder.CreateICmpEQ(
+ UnreleasedLoad, CI->getCompareOperand(), "should_store");
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+
+ Builder.SetInsertPoint(ReleasingStoreBB);
+ if (!UseUnconditionalReleaseBarrier)
+ TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+ /*IsLoad=*/true);
+ Builder.CreateBr(TryStoreBB);
Builder.SetInsertPoint(TryStoreBB);
Value *StoreSuccess = TLI->emitStoreConditional(
Builder, CI->getNewValOperand(), Addr, MemOpOrder);
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+ BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
Builder.CreateCondBr(StoreSuccess, SuccessBB,
- CI->isWeak() ? FailureBB : LoopBB);
-
- // Make sure later instructions don't get reordered with a fence if necessary.
+ CI->isWeak() ? FailureBB : RetryBB);
+
+ Builder.SetInsertPoint(ReleasedLoadBB);
+ Value *SecondLoad;
+ if (HasReleasedLoadBB) {
+ SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
+ "should_store");
+
+ // If the cmpxchg doesn't actually need any ordering when it fails, we can
+ // jump straight past that fence instruction (if it exists).
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ } else
+ Builder.CreateUnreachable();
+
+ // Make sure later instructions don't get reordered with a fence if
+ // necessary.
Builder.SetInsertPoint(SuccessBB);
TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
/*IsLoad=*/true);
// Finally, we have control-flow based knowledge of whether the cmpxchg
// succeeded or not. We expose this to later passes by converting any
- // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
-
- // Setup the builder so we can create any PHIs we need.
+ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
+ // PHI.
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+ // Setup the builder so we can create any PHIs we need.
+ Value *Loaded;
+ if (!HasReleasedLoadBB)
+ Loaded = UnreleasedLoad;
+ else {
+ Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
+ PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
+ TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
+
+ Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
+ PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
+ NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
+
+ Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
+ PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+ ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
+ ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
+
+ Loaded = ExitLoaded;
+ }
+
// Look for any users of the cmpxchg that are just comparing the loaded value
// against the desired one, and replace them with the CFG-derived version.
SmallVector<ExtractValueInst *, 2> PrunedInsts;
define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
; CHECK-LABEL: test7:
; CHECK-DAG: mov [[VAL1LO:r[0-9]+]], r1
-; CHECK-DAG: dmb {{ish$}}
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-LE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG1]], [[VAL1LO]]
-; CHECK-LE-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
-; CHECK-BE-DAG: eor [[MISMATCH_LO:r[0-9]+]], [[REG2]], r2
-; CHECK-BE-DAG: eor [[MISMATCH_HI:r[0-9]+]], [[REG1]], r1
+; CHECK-LE-DAG: eor [[MISMATCH_LO:.*]], [[REG1]], [[VAL1LO]]
+; CHECK-LE-DAG: eor [[MISMATCH_HI:.*]], [[REG2]], r2
+; CHECK-BE-DAG: eor [[MISMATCH_LO:.*]], [[REG2]], r2
+; CHECK-BE-DAG: eor [[MISMATCH_HI:.*]], [[REG1]], r1
; CHECK: orrs {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK: bne
+; CHECK-DAG: dmb {{ish$}}
; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
; CHECK: cmp
-; CHECK: bne
+; CHECK: beq
; CHECK: dmb {{ish$}}
; CHECK-THUMB-LABEL: test7:
-; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2
; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3
-; CHECK-THUMB-LE: orrs [[MISMATCH_HI]], [[MISMATCH_LO]]
+; CHECK-THUMB-LE: orrs.w {{.*}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK-THUMB: bne
+; CHECK-THUMB: dmb {{ish$}}
; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
; CHECK-THUMB: cmp
-; CHECK-THUMB: bne
+; CHECK-THUMB: beq
; CHECK-THUMB: dmb {{ish$}}
%pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst
%pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK-ARMV7: dmb ish
-; CHECK-ARMV7: [[LOOP_BB:\.?LBB[0-9]+_1]]:
; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
; CHECK-ARMV7: cmp [[OLDVAL]], r1
; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK-ARMV7: dmb ish
+; CHECK-ARMV7: [[LOOP_BB:\.?LBB.*]]:
; CHECK-ARMV7: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
; CHECK-ARMV7: cmp [[SUCCESS]], #0
-; CHECK-ARMV7: bne [[LOOP_BB]]
-; CHECK-ARMV7: dmb ish
-; CHECK-ARMV7: bx lr
+; CHECK-ARMV7: beq [[SUCCESS_BB:\.?LBB.*]]
+; CHECK-ARMV7: ldrex [[OLDVAL]], [r[[ADDR]]]
+; CHECK-ARMV7: cmp [[OLDVAL]], r1
+; CHECK-ARMV7: beq [[LOOP_BB]]
; CHECK-ARMV7: [[FAIL_BB]]:
; CHECK-ARMV7: clrex
; CHECK-ARMV7: bx lr
+; CHECK-ARMV7: [[SUCCESS_BB]]:
+; CHECK-ARMV7: dmb ish
+; CHECK-ARMV7: bx lr
-; CHECK-T2: dmb ish
-; CHECK-T2: [[LOOP_BB:\.?LBB[0-9]+_1]]:
; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
; CHECK-T2: cmp [[OLDVAL]], r1
-; CHECK-T2: clrexne
-; CHECK-T2: bxne lr
+; CHECK-T2: bne [[FAIL_BB:\.?LBB.*]]
+; CHECK-T2: dmb ish
+; CHECK-T2: [[LOOP_BB:\.?LBB.*]]:
; CHECK-T2: strex [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
; CHECK-T2: cmp [[SUCCESS]], #0
; CHECK-T2: dmbeq ish
; CHECK-T2: bxeq lr
-; CHECK-T2: b [[LOOP_BB]]
+; CHECK-T2: ldrex [[OLDVAL]], [r[[ADDR]]]
+; CHECK-T2: cmp [[OLDVAL]], r1
+; CHECK-T2: beq [[LOOP_BB]]
+; CHECK-T2: clrex
ret i32 %oldval
}
define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_return:
-; CHECK: dmb ishst
-
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], r1
; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+; CHECK: ldrex [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: beq [[LOOP]]
+
+; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: clrex
; CHECK: dmb ish
-; CHECK: movs r0, #1
+; CHECK: movs r0, #0
; CHECK: bx lr
-; CHECK: [[FAILED]]:
+; CHECK: [[SUCCESS]]:
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
; CHECK: dmb ish
-; CHECK: movs r0, #0
+; CHECK: movs r0, #1
; CHECK: bx lr
%pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
; CHECK-LABEL: test_return_bool:
-; CHECK: dmb ishst
; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], [[OLDBYTE]]
; CHECK: bne [[FAIL:LBB[0-9]+_[0-9]+]]
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+
+; CHECK: ldrexb [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], [[OLDBYTE]]
+; CHECK: beq [[LOOP]]
+
; FIXME: this eor is redundant. Need to teach DAG combine that.
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: [[FAIL]]:
+; CHECK: clrex
+; CHECK: movs [[TMP:r[0-9]+]], #0
; CHECK: eor r0, [[TMP]], #1
; CHECK: bx lr
-; CHECK: [[FAIL]]:
-; CHECK: movs [[TMP:r[0-9]+]], #0
+; CHECK: [[SUCCESS]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: movs [[TMP:r[0-9]+]], #1
; CHECK: eor r0, [[TMP]], #1
; CHECK: bx lr
define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_conditional:
-; CHECK: dmb ishst
-
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
; CHECK: cmp [[LOADED]], r1
; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
-; CHECK: b.w _bar
+; CHECK: ldrex [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: beq [[LOOP]]
; CHECK: [[FAILED]]:
; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: clrex
; CHECK: dmb ish
; CHECK: b.w _baz
+; CHECK: [[SUCCESS]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: dmb ish
+; CHECK: b.w _bar
+
%pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
%success = extractvalue { i32, i1 } %pair, 1
br i1 %success, label %true, label %false
%pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
%oldval = extractvalue { i32, i1 } %pair, 0
; CHECK-NEXT: BB#0:
-; CHECK-NEXT: dmb ish
; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r0]
; CHECK-NEXT: cmp [[LOADED]], r1
; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: BB#1:
+; CHECK-NEXT: dmb ish
; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0]
; CHECK-NEXT: cmp [[SUCCESS]], #0
; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
%success = extractvalue { i32, i1 } %pair, 1
; CHECK-NEXT: BB#0:
-; CHECK-NEXT: dmb ish
; CHECK-NEXT: ldrex [[LOADED:r[0-9]+]], [r1]
; CHECK-NEXT: cmp [[LOADED]], r2
; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: BB#1:
-; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
+; CHECK-NEXT: dmb ish
; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
; CHECK-NEXT: cmp [[SUCCESS]], #0
; CHECK-NEXT: bxne lr
; CHECK-NEXT: dmb ish
define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
-; CHECK: call void @llvm.arm.dmb(i32 11)
-; CHECK: br label %[[LOOP:.*]]
+; CHECK: br label %[[START:.*]]
-; CHECK: [[LOOP]]:
+; CHECK: [[START]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-; CHECK: [[TRY_STORE]]:
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[LOADED_LOOP:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD]]
+
+; CHECK: [[RELEASED_LOAD]]:
+; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i8
+; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i8 [[OLDVAL_LOOP]], %desired
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i8 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i8 [[OLDVAL]]
+; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
+; CHECK: ret i8 [[LOADED]]
%pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
%old = extractvalue { i8, i1 } %pairold, 0
define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
-; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-; CHECK: [[TRY_STORE]]:
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[LOADED_LOOP:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:.*]]
+
+; CHECK: [[RELEASED_LOAD]]:
+; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i16
+; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i16 [[OLDVAL_LOOP]], %desired
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i16 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i16 [[OLDVAL]]
+; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
+; CHECK: ret i16 [[LOADED]]
%pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
%old = extractvalue { i16, i1 } %pairold, 0
%old = extractvalue { i64, i1 } %pairold, 0
ret i64 %old
}
+
+define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
+; CHECK-LABEL: @test_cmpxchg_minsize
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[END:.*]]
+
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i32 [[LOADED]]
+
+ %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+ %oldval = extractvalue { i32, i1 } %pair, 0
+ ret i32 %oldval
+}
define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
; CHECK-LABEL: @test_cmpxchg_seq_cst
; Intrinsic for "dmb ishst" is then expected
-; CHECK: call void @llvm.arm.dmb(i32 10)
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 10)
+; CHECK: br label %[[TRY_STORE:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
; CHECK-LABEL: @test_cmpxchg_weak_fail
-; CHECK: call void @llvm.arm.dmb(i32 10)
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 10)
+; CHECK: br label %[[TRY_STORE:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
%oldval = extractvalue { i32, i1 } %pair, 0
ret i32 %oldval
}
+
+define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
+; CHECK-LABEL: @test_cmpxchg_seq_cst_minsize
+; CHECK: br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 10)
+; CHECK: br label %[[TRY_STORE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[END:.*]]
+
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i32 [[LOADED]]
+
+ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+ %oldval = extractvalue { i32, i1 } %pair, 0
+ ret i32 %oldval
+}