From 61f69153e8dd7956d03ce46e30257c5bb3e41873 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 18 Jan 2021 21:19:11 -0600 Subject: [PATCH] [PowerPC] Sign extend comparison operand for signed atomic comparisons As of 8dacca943af8a53a23b1caf3142d10fb4a77b645, we sign extend the atomic loaded operand for signed subword comparisons. However, the assumption that the other operand is correctly sign extended doesn't always hold. This patch sign extends the other operand if it needs to be sign extended. This is a second fix for https://bugs.llvm.org/show_bug.cgi?id=30451 Differential revision: https://reviews.llvm.org/D94058 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 79 ++++- llvm/test/CodeGen/PowerPC/atomics-regression.ll | 440 +++++++++++++----------- llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll | 105 ++++++ 3 files changed, 418 insertions(+), 206 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index c0dca4a..d6dd70f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -10593,17 +10593,88 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, return BB; } +static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) { + switch(MI.getOpcode()) { + default: + return false; + case PPC::COPY: + return TII->isSignExtended(MI); + case PPC::LHA: + case PPC::LHA8: + case PPC::LHAU: + case PPC::LHAU8: + case PPC::LHAUX: + case PPC::LHAUX8: + case PPC::LHAX: + case PPC::LHAX8: + case PPC::LWA: + case PPC::LWAUX: + case PPC::LWAX: + case PPC::LWAX_32: + case PPC::LWA_32: + case PPC::PLHA: + case PPC::PLHA8: + case PPC::PLHA8pc: + case PPC::PLHApc: + case PPC::PLWA: + case PPC::PLWA8: + case PPC::PLWA8pc: + case PPC::PLWApc: + case PPC::EXTSB: + case PPC::EXTSB8: + case PPC::EXTSB8_32_64: + case PPC::EXTSB8_rec: + case PPC::EXTSB_rec: + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: + case PPC::EXTSH8_rec: + case PPC::EXTSH_rec: + case PPC::EXTSW: + case PPC::EXTSWSLI: + case PPC::EXTSWSLI_32_64: + case PPC::EXTSWSLI_32_64_rec: + case PPC::EXTSWSLI_rec: + case PPC::EXTSW_32: + case PPC::EXTSW_32_64: + case PPC::EXTSW_32_64_rec: + case PPC::EXTSW_rec: + case PPC::SRAW: + case PPC::SRAWI: + case PPC::SRAWI_rec: + case PPC::SRAW_rec: + return true; + } + return false; +} + MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const PPCInstrInfo *TII = Subtarget.getInstrInfo(); + + // If this is a signed comparison and the value being compared is not known + // to be sign extended, sign extend it here. + DebugLoc dl = MI.getDebugLoc(); + MachineFunction *F = BB->getParent(); + MachineRegisterInfo &RegInfo = F->getRegInfo(); + Register incr = MI.getOperand(3).getReg(); + bool IsSignExtended = Register::isVirtualRegister(incr) && + isSignExtended(*RegInfo.getVRegDef(incr), TII); + + if (CmpOpcode == PPC::CMPW && !IsSignExtended) { + Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg) + .addReg(MI.getOperand(3).getReg()); + MI.getOperand(3).setReg(ValueReg); + } // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, CmpPred); - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're @@ -10613,14 +10684,11 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); Register dest = MI.getOperand(0).getReg(); Register ptrA = MI.getOperand(1).getReg(); Register ptrB = MI.getOperand(2).getReg(); - Register incr = MI.getOperand(3).getReg(); - DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = @@ -10634,7 +10702,6 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll index ae79f82..3b7caee 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll @@ -4352,16 +4352,17 @@ define i64 @test259(i64* %ptr, i64 %val) { define i8 @test260(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test260: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: .LBB260_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB260_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB260_1 ; PPC64LE-NEXT: .LBB260_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val monotonic ret i8 %ret @@ -4370,16 +4371,17 @@ define i8 @test260(i8* %ptr, i8 %val) { define i8 @test261(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test261: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: mr 5, 3 +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: .LBB261_1: -; PPC64LE-NEXT: lbarx 3, 0, 5 -; PPC64LE-NEXT: extsb 6, 3 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB261_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 5 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB261_1 ; PPC64LE-NEXT: .LBB261_3: +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val acquire @@ -4389,17 +4391,18 @@ define i8 @test261(i8* %ptr, i8 %val) { define i8 @test262(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test262: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB262_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB262_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB262_1 ; PPC64LE-NEXT: .LBB262_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val release ret i8 %ret @@ -4408,17 +4411,18 @@ define i8 @test262(i8* %ptr, i8 %val) { define i8 @test263(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test263: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB263_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB263_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB263_1 ; PPC64LE-NEXT: .LBB263_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val acq_rel @@ -4428,17 +4432,18 @@ define i8 @test263(i8* %ptr, i8 %val) { define i8 @test264(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test264: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB264_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB264_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB264_1 ; PPC64LE-NEXT: .LBB264_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val seq_cst @@ -4448,16 +4453,17 @@ define i8 @test264(i8* %ptr, i8 %val) { define i16 @test265(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test265: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: .LBB265_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB265_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB265_1 ; PPC64LE-NEXT: .LBB265_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val monotonic ret i16 %ret @@ -4466,16 +4472,17 @@ define i16 @test265(i16* %ptr, i16 %val) { define i16 @test266(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test266: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: mr 5, 3 +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: .LBB266_1: -; PPC64LE-NEXT: lharx 3, 0, 5 -; PPC64LE-NEXT: extsh 6, 3 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB266_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 5 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB266_1 ; PPC64LE-NEXT: .LBB266_3: +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val acquire @@ -4485,17 +4492,18 @@ define i16 @test266(i16* %ptr, i16 %val) { define i16 @test267(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test267: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB267_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB267_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB267_1 ; PPC64LE-NEXT: .LBB267_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val release ret i16 %ret @@ -4504,17 +4512,18 @@ define i16 @test267(i16* %ptr, i16 %val) { define i16 @test268(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test268: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB268_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB268_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB268_1 ; PPC64LE-NEXT: .LBB268_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val acq_rel @@ -4524,17 +4533,18 @@ define i16 @test268(i16* %ptr, i16 %val) { define i16 @test269(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test269: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB269_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB269_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB269_1 ; PPC64LE-NEXT: .LBB269_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val seq_cst @@ -4726,16 +4736,17 @@ define i64 @test279(i64* %ptr, i64 %val) { define i8 @test280(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test280: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: .LBB280_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB280_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB280_1 ; PPC64LE-NEXT: .LBB280_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val monotonic ret i8 %ret @@ -4744,16 +4755,17 @@ define i8 @test280(i8* %ptr, i8 %val) { define i8 @test281(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test281: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: mr 5, 3 +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: .LBB281_1: -; PPC64LE-NEXT: lbarx 3, 0, 5 -; PPC64LE-NEXT: extsb 6, 3 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB281_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 5 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB281_1 ; PPC64LE-NEXT: .LBB281_3: +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val acquire @@ -4763,17 +4775,18 @@ define i8 @test281(i8* %ptr, i8 %val) { define i8 @test282(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test282: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB282_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB282_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB282_1 ; PPC64LE-NEXT: .LBB282_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val release ret i8 %ret @@ -4782,17 +4795,18 @@ define i8 @test282(i8* %ptr, i8 %val) { define i8 @test283(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test283: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB283_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB283_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB283_1 ; PPC64LE-NEXT: .LBB283_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val acq_rel @@ -4802,17 +4816,18 @@ define i8 @test283(i8* %ptr, i8 %val) { define i8 @test284(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test284: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB284_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB284_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB284_1 ; PPC64LE-NEXT: .LBB284_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val seq_cst @@ -4822,16 +4837,17 @@ define i8 @test284(i8* %ptr, i8 %val) { define i16 @test285(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test285: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: .LBB285_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB285_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB285_1 ; PPC64LE-NEXT: .LBB285_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val monotonic ret i16 %ret @@ -4840,16 +4856,17 @@ define i16 @test285(i16* %ptr, i16 %val) { define i16 @test286(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test286: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: mr 5, 3 +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: .LBB286_1: -; PPC64LE-NEXT: lharx 3, 0, 5 -; PPC64LE-NEXT: extsh 6, 3 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB286_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 5 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB286_1 ; PPC64LE-NEXT: .LBB286_3: +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val acquire @@ -4859,17 +4876,18 @@ define i16 @test286(i16* %ptr, i16 %val) { define i16 @test287(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test287: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB287_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB287_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB287_1 ; PPC64LE-NEXT: .LBB287_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val release ret i16 %ret @@ -4878,17 +4896,18 @@ define i16 @test287(i16* %ptr, i16 %val) { define i16 @test288(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test288: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB288_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB288_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB288_1 ; PPC64LE-NEXT: .LBB288_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val acq_rel @@ -4898,17 +4917,18 @@ define i16 @test288(i16* %ptr, i16 %val) { define i16 @test289(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test289: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB289_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB289_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB289_1 ; PPC64LE-NEXT: .LBB289_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val seq_cst @@ -8076,16 +8096,17 @@ define i64 @test479(i64* %ptr, i64 %val) { define i8 @test480(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test480: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: .LBB480_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB480_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB480_1 ; PPC64LE-NEXT: .LBB480_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") monotonic ret i8 %ret @@ -8094,16 +8115,17 @@ define i8 @test480(i8* %ptr, i8 %val) { define i8 @test481(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test481: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: mr 5, 3 +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: .LBB481_1: -; PPC64LE-NEXT: lbarx 3, 0, 5 -; PPC64LE-NEXT: extsb 6, 3 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB481_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 5 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB481_1 ; PPC64LE-NEXT: .LBB481_3: +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") acquire @@ -8113,17 +8135,18 @@ define i8 @test481(i8* %ptr, i8 %val) { define i8 @test482(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test482: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB482_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB482_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB482_1 ; PPC64LE-NEXT: .LBB482_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") release ret i8 %ret @@ -8132,17 +8155,18 @@ define i8 @test482(i8* %ptr, i8 %val) { define i8 @test483(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test483: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB483_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB483_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB483_1 ; PPC64LE-NEXT: .LBB483_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") acq_rel @@ -8152,17 +8176,18 @@ define i8 @test483(i8* %ptr, i8 %val) { define i8 @test484(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test484: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB484_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB484_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB484_1 ; PPC64LE-NEXT: .LBB484_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i8* %ptr, i8 %val syncscope("singlethread") seq_cst @@ -8172,16 +8197,17 @@ define i8 @test484(i8* %ptr, i8 %val) { define i16 @test485(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test485: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: .LBB485_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB485_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB485_1 ; PPC64LE-NEXT: .LBB485_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") monotonic ret i16 %ret @@ -8190,16 +8216,17 @@ define i16 @test485(i16* %ptr, i16 %val) { define i16 @test486(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test486: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: mr 5, 3 +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: .LBB486_1: -; PPC64LE-NEXT: lharx 3, 0, 5 -; PPC64LE-NEXT: extsh 6, 3 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB486_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 5 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB486_1 ; PPC64LE-NEXT: .LBB486_3: +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") acquire @@ -8209,17 +8236,18 @@ define i16 @test486(i16* %ptr, i16 %val) { define i16 @test487(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test487: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB487_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB487_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB487_1 ; PPC64LE-NEXT: .LBB487_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") release ret i16 %ret @@ -8228,17 +8256,18 @@ define i16 @test487(i16* %ptr, i16 %val) { define i16 @test488(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test488: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB488_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB488_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB488_1 ; PPC64LE-NEXT: .LBB488_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") acq_rel @@ -8248,17 +8277,18 @@ define i16 @test488(i16* %ptr, i16 %val) { define i16 @test489(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test489: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB489_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: ble 0, .LBB489_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB489_1 ; PPC64LE-NEXT: .LBB489_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw max i16* %ptr, i16 %val syncscope("singlethread") seq_cst @@ -8450,16 +8480,17 @@ define i64 @test499(i64* %ptr, i64 %val) { define i8 @test500(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test500: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: .LBB500_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB500_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB500_1 ; PPC64LE-NEXT: .LBB500_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") monotonic ret i8 %ret @@ -8468,16 +8499,17 @@ define i8 @test500(i8* %ptr, i8 %val) { define i8 @test501(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test501: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: mr 5, 3 +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: .LBB501_1: -; PPC64LE-NEXT: lbarx 3, 0, 5 -; PPC64LE-NEXT: extsb 6, 3 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB501_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 5 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB501_1 ; PPC64LE-NEXT: .LBB501_3: +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") acquire @@ -8487,17 +8519,18 @@ define i8 @test501(i8* %ptr, i8 %val) { define i8 @test502(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test502: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB502_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB502_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB502_1 ; PPC64LE-NEXT: .LBB502_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") release ret i8 %ret @@ -8506,17 +8539,18 @@ define i8 @test502(i8* %ptr, i8 %val) { define i8 @test503(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test503: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB503_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB503_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB503_1 ; PPC64LE-NEXT: .LBB503_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") acq_rel @@ -8526,17 +8560,18 @@ define i8 @test503(i8* %ptr, i8 %val) { define i8 @test504(i8* %ptr, i8 %val) { ; PPC64LE-LABEL: test504: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsb 5, 4 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB504_1: -; PPC64LE-NEXT: lbarx 5, 0, 3 -; PPC64LE-NEXT: extsb 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lbarx 4, 0, 3 +; PPC64LE-NEXT: extsb 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB504_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: stbcx. 4, 0, 3 +; PPC64LE-NEXT: stbcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB504_1 ; PPC64LE-NEXT: .LBB504_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i8* %ptr, i8 %val syncscope("singlethread") seq_cst @@ -8546,16 +8581,17 @@ define i8 @test504(i8* %ptr, i8 %val) { define i16 @test505(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test505: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: .LBB505_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB505_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB505_1 ; PPC64LE-NEXT: .LBB505_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") monotonic ret i16 %ret @@ -8564,16 +8600,17 @@ define i16 @test505(i16* %ptr, i16 %val) { define i16 @test506(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test506: ; PPC64LE: # %bb.0: -; PPC64LE-NEXT: mr 5, 3 +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: .LBB506_1: -; PPC64LE-NEXT: lharx 3, 0, 5 -; PPC64LE-NEXT: extsh 6, 3 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB506_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 5 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB506_1 ; PPC64LE-NEXT: .LBB506_3: +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") acquire @@ -8583,17 +8620,18 @@ define i16 @test506(i16* %ptr, i16 %val) { define i16 @test507(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test507: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB507_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB507_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB507_1 ; PPC64LE-NEXT: .LBB507_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") release ret i16 %ret @@ -8602,17 +8640,18 @@ define i16 @test507(i16* %ptr, i16 %val) { define i16 @test508(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test508: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: .LBB508_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB508_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB508_1 ; PPC64LE-NEXT: .LBB508_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") acq_rel @@ -8622,17 +8661,18 @@ define i16 @test508(i16* %ptr, i16 %val) { define i16 @test509(i16* %ptr, i16 %val) { ; PPC64LE-LABEL: test509: ; PPC64LE: # %bb.0: +; PPC64LE-NEXT: extsh 5, 4 ; PPC64LE-NEXT: sync ; PPC64LE-NEXT: .LBB509_1: -; PPC64LE-NEXT: lharx 5, 0, 3 -; PPC64LE-NEXT: extsh 6, 5 -; PPC64LE-NEXT: cmpw 4, 6 +; PPC64LE-NEXT: lharx 4, 0, 3 +; PPC64LE-NEXT: extsh 6, 4 +; PPC64LE-NEXT: cmpw 5, 6 ; PPC64LE-NEXT: bge 0, .LBB509_3 ; PPC64LE-NEXT: # %bb.2: -; PPC64LE-NEXT: sthcx. 4, 0, 3 +; PPC64LE-NEXT: sthcx. 5, 0, 3 ; PPC64LE-NEXT: bne 0, .LBB509_1 ; PPC64LE-NEXT: .LBB509_3: -; PPC64LE-NEXT: mr 3, 5 +; PPC64LE-NEXT: mr 3, 4 ; PPC64LE-NEXT: lwsync ; PPC64LE-NEXT: blr %ret = atomicrmw min i16* %ptr, i16 %val syncscope("singlethread") seq_cst diff --git a/llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll b/llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll new file mode 100644 index 0000000..7716dc0 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/sign-ext-atomics.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-linux-gnu < %s | FileCheck %s +define i16 @SEXTParam(i16 signext %0) #0 { +; CHECK-LABEL: SEXTParam: +; CHECK: # %bb.0: # %top +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: sth 4, -4(1) +; CHECK-NEXT: addi 4, 1, -4 +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB0_1: # %top +; CHECK-NEXT: # +; CHECK-NEXT: lharx 5, 0, 4 +; CHECK-NEXT: extsh 5, 5 +; CHECK-NEXT: cmpw 3, 5 +; CHECK-NEXT: bge 0, .LBB0_3 +; CHECK-NEXT: # %bb.2: # %top +; CHECK-NEXT: # +; CHECK-NEXT: sthcx. 3, 0, 4 +; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %top +; CHECK-NEXT: lwsync +; CHECK-NEXT: lhz 3, -4(1) +; CHECK-NEXT: cmpd 7, 3, 3 +; CHECK-NEXT: bne- 7, .+4 +; CHECK-NEXT: isync +; CHECK-NEXT: blr +top: + %1 = alloca i16, align 4 + %2 = bitcast i16* %1 to i8* + store i16 0, i16* %1, align 4 + %rv.i = atomicrmw min i16* %1, i16 %0 acq_rel + %rv.i2 = load atomic i16, i16* %1 acquire, align 16 + ret i16 %rv.i2 +} + +define i16 @noSEXTParam(i16 %0) #0 { +; CHECK-LABEL: noSEXTParam: +; CHECK: # %bb.0: # %top +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: extsh 3, 3 +; CHECK-NEXT: sth 4, -4(1) +; CHECK-NEXT: addi 4, 1, -4 +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB1_1: # %top +; CHECK-NEXT: # +; CHECK-NEXT: lharx 5, 0, 4 +; CHECK-NEXT: extsh 5, 5 +; CHECK-NEXT: cmpw 3, 5 +; CHECK-NEXT: bge 0, .LBB1_3 +; CHECK-NEXT: # %bb.2: # %top +; CHECK-NEXT: # +; CHECK-NEXT: sthcx. 3, 0, 4 +; CHECK-NEXT: bne 0, .LBB1_1 +; CHECK-NEXT: .LBB1_3: # %top +; CHECK-NEXT: lwsync +; CHECK-NEXT: lhz 3, -4(1) +; CHECK-NEXT: cmpd 7, 3, 3 +; CHECK-NEXT: bne- 7, .+4 +; CHECK-NEXT: isync +; CHECK-NEXT: blr +top: + %1 = alloca i16, align 4 + %2 = bitcast i16* %1 to i8* + store i16 0, i16* %1, align 4 + %rv.i = atomicrmw min i16* %1, i16 %0 acq_rel + %rv.i2 = load atomic i16, i16* %1 acquire, align 16 + ret i16 %rv.i2 +} + +define i16 @noSEXTLoad(i16 *%p) #0 { +; CHECK-LABEL: noSEXTLoad: +; CHECK: # %bb.0: # %top +; CHECK-NEXT: lhz 5, 0(3) +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: addi 3, 1, -4 +; CHECK-NEXT: sth 4, -4(1) +; CHECK-NEXT: extsh 4, 5 +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB2_1: # %top +; CHECK-NEXT: # +; CHECK-NEXT: lharx 5, 0, 3 +; CHECK-NEXT: extsh 5, 5 +; CHECK-NEXT: cmpw 4, 5 +; CHECK-NEXT: bge 0, .LBB2_3 +; CHECK-NEXT: # %bb.2: # %top +; CHECK-NEXT: # +; CHECK-NEXT: sthcx. 4, 0, 3 +; CHECK-NEXT: bne 0, .LBB2_1 +; CHECK-NEXT: .LBB2_3: # %top +; CHECK-NEXT: lwsync +; CHECK-NEXT: lhz 3, -4(1) +; CHECK-NEXT: cmpd 7, 3, 3 +; CHECK-NEXT: bne- 7, .+4 +; CHECK-NEXT: isync +; CHECK-NEXT: blr +top: + %0 = load i16, i16* %p, align 2 + %1 = alloca i16, align 4 + %2 = bitcast i16* %1 to i8* + store i16 0, i16* %1, align 4 + %rv.i = atomicrmw min i16* %1, i16 %0 acq_rel + %rv.i2 = load atomic i16, i16* %1 acquire, align 16 + ret i16 %rv.i2 +} +attributes #0 = { nounwind } -- 2.7.4