From 34c098b780a27a90b5614ea3b949b9269835f2a5 Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Tue, 11 May 2021 17:15:07 +0100 Subject: [PATCH] [ARM] Prevent spilling between ldrex/strex pairs Based on the same for AArch64: 4751cadcca45984d7671e594ce95aed8fe030bf1 At -O0, the fast register allocator may insert spills between the ldrex and strex instructions inserted by AtomicExpandPass when expanding atomicrmw instructions in LL/SC loops. To avoid this, expand to cmpxchg loops and therefore expand the cmpxchg pseudos after register allocation. Required a tweak to ARMExpandPseudo::ExpandCMP_SWAP to use the 4-byte encoding of UXT, since the pseudo instruction can be allocated a high register (R8-R15) which the 2-byte encoding doesn't support. However, the 4-byte encodings are not present for ARM v8-M Baseline. To enable this, two new pseudos are added for Thumb which are only valid for v8mbase, tCMP_SWAP_8 and tCMP_SWAP_16. The previously committed attempt in D101164 had to be reverted due to runtime failures in the test suites. Rather than spending time fixing that implementation (adding another implementation of atomic operations and more divergence between backends) I have chosen to follow the approach taken in D101163. Differential Revision: https://reviews.llvm.org/D101898 Depends on D101912 --- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 36 ++++--- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 4 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 8 ++ llvm/lib/Target/ARM/ARMInstrThumb.td | 18 ++++ .../ARM/atomicrmw_exclusive_monitor_ints.ll | 110 ++++++++++----------- 5 files changed, 107 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 5fe8e96..5764ddd 100644 --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1566,6 +1566,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, Register DesiredReg = MI.getOperand(3).getReg(); Register NewReg = MI.getOperand(4).getReg(); + if (IsThumb) { + assert(STI->hasV8MBaselineOps() && + "CMP_SWAP not expected to be custom expanded for Thumb1"); + assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && + "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); + assert(ARM::tGPRRegClass.contains(DesiredReg) && + "DesiredReg used for UXT op must be tGPR"); + } + MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -2779,20 +2788,23 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; + case ARM::tCMP_SWAP_8: + assert(STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB, + NextMBBI); + case ARM::tCMP_SWAP_16: + assert(STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, + NextMBBI); + case ARM::CMP_SWAP_8: - if (STI->isThumb()) - return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, - ARM::tUXTB, NextMBBI); - else - return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, - ARM::UXTB, NextMBBI); + assert(!STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB, + NextMBBI); case ARM::CMP_SWAP_16: - if (STI->isThumb()) - return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, - ARM::tUXTH, NextMBBI); - else - return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, - ARM::UXTH, NextMBBI); + assert(!STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, + NextMBBI); case ARM::CMP_SWAP_32: if (STI->isThumb()) return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6149aec..5024f47 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3299,9 +3299,9 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; EVT MemTy = cast(N)->getMemoryVT(); if (MemTy == MVT::i8) - Opcode = ARM::CMP_SWAP_8; + Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; else if (MemTy == MVT::i16) - Opcode = ARM::CMP_SWAP_16; + Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; else if (MemTy == MVT::i32) Opcode = ARM::CMP_SWAP_32; else diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index cae4b9b..f9f0b57 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -19359,6 +19359,14 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { if (AI->isFloatingPointOperation()) return AtomicExpansionKind::CmpXChg; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on the + // stack and close enough to the spill slot, this can lead to a situation + // where the monitor always gets cleared and the atomic operation can never + // succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 64d4dc0..ef07b28 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1766,3 +1766,21 @@ def : tInstAlias<"asr${s}${p} $Rdm, $imm", def tLDRConstPool : tAsmPseudo<"ldr${p} $Rt, $immediate", (ins tGPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; + +//===---------------------------------- +// Atomic cmpxchg for -O0 +//===---------------------------------- + +// See ARMInstrInfo.td. These two thumb specific pseudos are required to +// restrict the register class for the UXTB/UXTH ops used in the expansion. + +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", + mayLoad = 1, mayStore = 1 in { +def tCMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), + (ins GPR:$addr, tGPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def tCMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), + (ins GPR:$addr, tGPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; +} diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll index 12b270e..277843c 100644 --- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll @@ -16,7 +16,7 @@ define i8 @test_xchg_i8() { ; COMMON-LABEL: test_xchg_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_lock_test_and_set_1 entry: @@ -26,7 +26,7 @@ entry: define i8 @test_add_i8() { ; COMMON-LABEL: test_add_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_add_1 entry: @@ -36,7 +36,7 @@ entry: define i8 @test_sub_i8() { ; COMMON-LABEL: test_sub_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_sub_1 entry: @@ -46,7 +46,7 @@ entry: define i8 @test_and_i8() { ; COMMON-LABEL: test_and_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_and_1 entry: @@ -56,7 +56,7 @@ entry: define i8 @test_nand_i8() { ; COMMON-LABEL: test_nand_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_nand_1 entry: @@ -66,7 +66,7 @@ entry: define i8 @test_or_i8() { ; COMMON-LABEL: test_or_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_or_1 entry: @@ -76,7 +76,7 @@ entry: define i8 @test_xor_i8() { ; COMMON-LABEL: test_xor_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_xor_1 entry: @@ -86,7 +86,7 @@ entry: define i8 @test_max_i8() { ; COMMON-LABEL: test_max_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_max_1 entry: @@ -96,7 +96,7 @@ entry: define i8 @test_min_i8() { ; COMMON-LABEL: test_min_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_min_1 entry: @@ -106,7 +106,7 @@ entry: define i8 @test_umax_i8() { ; COMMON-LABEL: test_umax_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_umax_1 entry: @@ -116,7 +116,7 @@ entry: define i8 @test_umin_i8() { ; COMMON-LABEL: test_umin_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_umin_1 entry: @@ -128,7 +128,7 @@ entry: define i16 @test_xchg_i16() { ; COMMON-LABEL: test_xchg_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_lock_test_and_set_2 entry: @@ -138,7 +138,7 @@ entry: define i16 @test_add_i16() { ; COMMON-LABEL: test_add_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_add_2 entry: @@ -148,7 +148,7 @@ entry: define i16 @test_sub_i16() { ; COMMON-LABEL: test_sub_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_sub_2 entry: @@ -158,7 +158,7 @@ entry: define i16 @test_and_i16() { ; COMMON-LABEL: test_and_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_and_2 entry: @@ -168,7 +168,7 @@ entry: define i16 @test_nand_i16() { ; COMMON-LABEL: test_nand_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_nand_2 entry: @@ -178,7 +178,7 @@ entry: define i16 @test_or_i16() { ; COMMON-LABEL: test_or_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_or_2 entry: @@ -188,7 +188,7 @@ entry: define i16 @test_xor_i16() { ; COMMON-LABEL: test_xor_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_xor_2 entry: @@ -198,7 +198,7 @@ entry: define i16 @test_max_i16() { ; COMMON-LABEL: test_max_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_max_2 entry: @@ -208,7 +208,7 @@ entry: define i16 @test_min_i16() { ; COMMON-LABEL: test_min_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_min_2 entry: @@ -218,7 +218,7 @@ entry: define i16 @test_umax_i16() { ; COMMON-LABEL: test_umax_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_umax_2 entry: @@ -228,7 +228,7 @@ entry: define i16 @test_umin_i16() { ; COMMON-LABEL: test_umin_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_umin_2 entry: @@ -240,7 +240,7 @@ entry: define i32 @test_xchg_i32() { ; COMMON-LABEL: test_xchg_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_lock_test_and_set_4 entry: @@ -250,7 +250,7 @@ entry: define i32 @test_add_i32() { ; COMMON-LABEL: test_add_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_add_4 entry: @@ -260,7 +260,7 @@ entry: define i32 @test_sub_i32() { ; COMMON-LABEL: test_sub_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_sub_4 entry: @@ -270,7 +270,7 @@ entry: define i32 @test_and_i32() { ; COMMON-LABEL: test_and_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_and_4 entry: @@ -280,7 +280,7 @@ entry: define i32 @test_nand_i32() { ; COMMON-LABEL: test_nand_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_nand_4 entry: @@ -290,7 +290,7 @@ entry: define i32 @test_or_i32() { ; COMMON-LABEL: test_or_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_or_4 entry: @@ -300,7 +300,7 @@ entry: define i32 @test_xor_i32() { ; COMMON-LABEL: test_xor_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_xor_4 entry: @@ -310,7 +310,7 @@ entry: define i32 @test_max_i32() { ; COMMON-LABEL: test_max_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_max_4 entry: @@ -320,7 +320,7 @@ entry: define i32 @test_min_i32() { ; COMMON-LABEL: test_min_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_min_4 @@ -331,7 +331,7 @@ entry: define i32 @test_umax_i32() { ; COMMON-LABEL: test_umax_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_umax_4 entry: @@ -341,7 +341,7 @@ entry: define i32 @test_umin_i32() { ; COMMON-LABEL: test_umin_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_umin_4 entry: @@ -352,10 +352,10 @@ entry: define i64 @test_xchg_i64() { ; COMMON-LABEL: test_xchg_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_lock_test_and_set_8 -; BASELINE64: bl __sync_lock_test_and_set_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -363,10 +363,10 @@ entry: define i64 @test_add_i64() { ; COMMON-LABEL: test_add_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_add_8 -; BASELINE64: bl __sync_fetch_and_add_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -374,10 +374,10 @@ entry: define i64 @test_sub_i64() { ; COMMON-LABEL: test_sub_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_sub_8 -; BASELINE64: bl __sync_fetch_and_sub_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -385,10 +385,10 @@ entry: define i64 @test_and_i64() { ; COMMON-LABEL: test_and_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_and_8 -; BASELINE64: bl __sync_fetch_and_and_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -396,10 +396,10 @@ entry: define i64 @test_nand_i64() { ; COMMON-LABEL: test_nand_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_nand_8 -; BASELINE64: bl __sync_fetch_and_nand_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -407,10 +407,10 @@ entry: define i64 @test_or_i64() { ; COMMON-LABEL: test_or_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_or_8 -; BASELINE64: bl __sync_fetch_and_or_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -418,10 +418,10 @@ entry: define i64 @test_xor_i64() { ; COMMON-LABEL: test_xor_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_xor_8 -; BASELINE64: bl __sync_fetch_and_xor_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -430,10 +430,10 @@ entry: define i64 @test_max_i64() { ; COMMON-LABEL: test_max_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_max_8 -; BASELINE64: bl __sync_fetch_and_max_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw max i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -441,10 +441,10 @@ entry: define i64 @test_min_i64() { ; COMMON-LABEL: test_min_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_min_8 -; BASELINE64: bl __sync_fetch_and_min_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw min i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -452,10 +452,10 @@ entry: define i64 @test_umax_i64() { ; COMMON-LABEL: test_umax_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_umax_8 -; BASELINE64: bl __sync_fetch_and_umax_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw umax i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -463,10 +463,10 @@ entry: define i64 @test_umin_i64() { ; COMMON-LABEL: test_umin_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_umin_8 -; BASELINE64: bl __sync_fetch_and_umin_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw umin i64* @atomic_i64, i64 1 monotonic ret i64 %0 -- 2.7.4