From 4789fc75d3501f14cfbd5b102f173721d498ff58 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 21 Apr 2021 12:12:28 +0100 Subject: [PATCH] AArch64: support i128 cmpxchg in GlobalISel. There are three essentially different cases to handle: * -O1, no LSE. The IR is expanded to ldxp/stxp and we need patterns to select them. * -O0, no LSE. We get G_ATOMIC_CMPXCHG, and need to produce CMP_SWAP_N pseudos. The registers are all 64-bit so this is easy. * LSE. We get G_ATOMIC_CMPXCHG and need to produce a CASP instruction with XSeqPair registers. The last case is by far the hardest, and and adds 128-bit GPR support as a byproduct. --- .../Target/AArch64/AArch64GenRegisterBankInfo.def | 36 ++++---- llvm/lib/Target/AArch64/AArch64InstrGISel.td | 5 ++ llvm/lib/Target/AArch64/AArch64RegisterBanks.td | 2 +- .../AArch64/GISel/AArch64InstructionSelector.cpp | 34 ++++++-- .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 95 +++++++++++++++++++++- .../Target/AArch64/GISel/AArch64LegalizerInfo.h | 2 + .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 21 +++-- .../Target/AArch64/GISel/AArch64RegisterBankInfo.h | 19 +++-- .../CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll | 52 ++++++++++++ .../AArch64/GlobalISel/legalize-cmpxchg-128.mir | 73 +++++++++++++++++ .../GlobalISel/legalizer-info-validation.mir | 1 - .../CodeGen/AArch64/GlobalISel/regbank-extract.mir | 21 +++++ 12 files changed, 317 insertions(+), 44 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir diff --git a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def index 528756b..87aef1d 100644 --- a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def +++ b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def @@ -29,6 +29,8 @@ RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{ {0, 32, AArch64::GPRRegBank}, // 7: GPR 64-bit value. {0, 64, AArch64::GPRRegBank}, + // 8: GPR 128-bit value. + {0, 128, AArch64::GPRRegBank}, }; // ValueMappings. @@ -66,51 +68,55 @@ RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 22: GPR 64-bit value. <-- This must match Last3OpsIdx. + // 22: GPR 64-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, + // 25: GPR 128-bit value. <-- This must match Last3OpsIdx. + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, // Cross register bank copies. - // 25: FPR 16-bit value to GPR 16-bit. <-- This must match + // 28: FPR 16-bit value to GPR 16-bit. <-- This must match // FirstCrossRegCpyIdx. // Note: This is the kind of copy we see with physical registers. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 27: FPR 32-bit value to GPR 32-bit value. + // 30: FPR 32-bit value to GPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 29: FPR 64-bit value to GPR 64-bit value. + // 32: FPR 64-bit value to GPR 64-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - // 31: FPR 128-bit value to GPR 128-bit value (invalid) + // 34: FPR 128-bit value to GPR 128-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 33: FPR 256-bit value to GPR 256-bit value (invalid) + // 36: FPR 256-bit value to GPR 256-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 35: FPR 512-bit value to GPR 512-bit value (invalid) + // 38: FPR 512-bit value to GPR 512-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 37: GPR 32-bit value to FPR 32-bit value. + // 40: GPR 32-bit value to FPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 39: GPR 64-bit value to FPR 64-bit value. <-- This must match + // 42: GPR 64-bit value to FPR 64-bit value. <-- This must match // LastCrossRegCpyIdx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 41: FPExt: 16 to 32. <-- This must match FPExt16To32Idx. + // 44: FPExt: 16 to 32. <-- This must match FPExt16To32Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 43: FPExt: 16 to 32. <-- This must match FPExt16To64Idx. + // 46: FPExt: 16 to 32. <-- This must match FPExt16To64Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 45: FPExt: 32 to 64. <-- This must match FPExt32To64Idx. + // 48: FPExt: 32 to 64. <-- This must match FPExt32To64Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx. + // 50: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 49: Shift scalar with 64 bit shift imm + // 52: Shift scalar with 64 bit shift imm {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, @@ -167,6 +173,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx, return 0; if (Size <= 64) return 1; + if (Size <= 128) + return 2; return -1; } if (RBIdx == PMI_FirstFPR) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 2b39ceb..58b6dca 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -280,3 +280,8 @@ def : Pat<(atomic_cmp_swap_32 GPR64:$addr, GPR32:$desired, GPR32:$new), def : Pat<(atomic_cmp_swap_64 GPR64:$addr, GPR64:$desired, GPR64:$new), (CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>; } + +def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr), + (STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; +def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr), + (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBanks.td b/llvm/lib/Target/AArch64/AArch64RegisterBanks.td index 7bbd992..615ce7d 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterBanks.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterBanks.td @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// /// General Purpose Registers: W, X. -def GPRRegBank : RegisterBank<"GPR", [GPR64all]>; +def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>; /// Floating Point/Vector Registers: B, H, S, D, Q. def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index c763b7b..3fd1f04 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -490,6 +490,8 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, if (Ty.getSizeInBits() == 64) return GetAllRegSet ? &AArch64::GPR64allRegClass : &AArch64::GPR64RegClass; + if (Ty.getSizeInBits() == 128) + return &AArch64::XSeqPairsClassRegClass; return nullptr; } @@ -522,6 +524,8 @@ getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, if (SizeInBits == 64) return GetAllRegSet ? &AArch64::GPR64allRegClass : &AArch64::GPR64RegClass; + if (SizeInBits == 128) + return &AArch64::XSeqPairsClassRegClass; } if (RegBankID == AArch64::FPRRegBankID) { @@ -2465,19 +2469,24 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { if (DstTy.getSizeInBits() != 64) return false; + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 64 != 0) + return false; + + // Check we have the right regbank always. const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); - // Check we have the right regbank always. - assert(SrcRB.getID() == AArch64::FPRRegBankID && - DstRB.getID() == AArch64::FPRRegBankID && - "Wrong extract regbank!"); - (void)SrcRB; + assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!"); + + if (SrcRB.getID() == AArch64::GPRRegBankID) { + MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) + .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64); + I.eraseFromParent(); + return true; + } // Emit the same code as a vector extract. // Offset must be a multiple of 64. - unsigned Offset = I.getOperand(2).getImm(); - if (Offset % 64 != 0) - return false; unsigned LaneIdx = Offset / 64; MachineInstr *Extract = emitExtractVectorElt( DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); @@ -4900,6 +4909,15 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( switch (IntrinID) { default: return false; + case Intrinsic::aarch64_ldxp: + case Intrinsic::aarch64_ldaxp: { + auto NewI = MIB.buildInstr( + IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, + {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, + {I.getOperand(3)}); + NewI.cloneMemRefs(I); + break; + } case Intrinsic::trap: MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); break; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index edaf127..398f1ca 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AArch64LegalizerInfo.h" +#include "AArch64RegisterBankInfo.h" #include "AArch64Subtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -504,14 +505,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) .lowerIf( - all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0))); + all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, s1), typeIs(2, p0))); + + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) + .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))) + .customIf([](const LegalityQuery &Query) { + return Query.Types[0].getSizeInBits() == 128; + }); getActionDefinitionsBuilder( {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, - G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) - .legalIf(all( - typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))); + G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) + .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))); getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); @@ -768,6 +774,8 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, return legalizeRotate(MI, MRI, Helper); case TargetOpcode::G_CTPOP: return legalizeCTPOP(MI, MRI, Helper); + case TargetOpcode::G_ATOMIC_CMPXCHG: + return legalizeAtomicCmpxchg128(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); @@ -1056,3 +1064,82 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI, MI.eraseFromParent(); return true; } + +bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128( + MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + LLT s64 = LLT::scalar(64); + auto Addr = MI.getOperand(1).getReg(); + auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2)); + auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3)); + auto DstLo = MRI.createGenericVirtualRegister(s64); + auto DstHi = MRI.createGenericVirtualRegister(s64); + + MachineInstrBuilder CAS; + if (ST->hasLSE()) { + // We have 128-bit CASP instructions taking XSeqPair registers, which are + // s128. We need the merge/unmerge to bracket the expansion and pair up with + // the rest of the MIR so we must reassemble the extracted registers into a + // 128-bit known-regclass one with code like this: + // + // %in1 = REG_SEQUENCE Lo, Hi ; One for each input + // %out = CASP %in1, ... + // %OldLo = G_EXTRACT %out, 0 + // %OldHi = G_EXTRACT %out, 64 + auto Ordering = (*MI.memoperands_begin())->getOrdering(); + unsigned Opcode; + switch (Ordering) { + case AtomicOrdering::Acquire: + Opcode = AArch64::CASPAX; + break; + case AtomicOrdering::Release: + Opcode = AArch64::CASPLX; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + Opcode = AArch64::CASPALX; + break; + default: + Opcode = AArch64::CASPX; + break; + } + + LLT s128 = LLT::scalar(128); + auto CASDst = MRI.createGenericVirtualRegister(s128); + auto CASDesired = MRI.createGenericVirtualRegister(s128); + auto CASNew = MRI.createGenericVirtualRegister(s128); + MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {}) + .addUse(DesiredI->getOperand(0).getReg()) + .addImm(AArch64::sube64) + .addUse(DesiredI->getOperand(1).getReg()) + .addImm(AArch64::subo64); + MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {}) + .addUse(NewI->getOperand(0).getReg()) + .addImm(AArch64::sube64) + .addUse(NewI->getOperand(1).getReg()) + .addImm(AArch64::subo64); + + CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr}); + + MIRBuilder.buildExtract({DstLo}, {CASDst}, 0); + MIRBuilder.buildExtract({DstHi}, {CASDst}, 64); + } else { + // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP + // can take arbitrary registers so it just has the normal GPR64 operands the + // rest of AArch64 is expecting. + auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + CAS = MIRBuilder.buildInstr(AArch64::CMP_SWAP_128, {DstLo, DstHi, Scratch}, + {Addr, DesiredI->getOperand(0), + DesiredI->getOperand(1), NewI->getOperand(0), + NewI->getOperand(1)}); + } + + CAS.cloneMemRefs(MI); + constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(), + *MRI.getTargetRegisterInfo(), + *ST->getRegBankInfo()); + + MIRBuilder.buildMerge(MI.getOperand(0), {DstLo, DstHi}); + MI.eraseFromParent(); + return true; +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h index 51ae105..727aa31 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -54,6 +54,8 @@ private: LegalizerHelper &Helper) const; bool legalizeCTPOP(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 84e2a00..705654d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -69,7 +69,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) // GR64all + its subclasses. assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && "Subclass not added?"); - assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit"); + assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit"); // The FPR register bank is fully defined by all the registers in // GR64all + its subclasses. @@ -87,7 +87,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) // Check that the TableGen'ed like file is in sync we our expectations. // First, the Idx. assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, - {PMI_GPR32, PMI_GPR64}) && + {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && "PartialMappingIdx's are incorrectly ordered"); assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, @@ -104,6 +104,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); + CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); @@ -124,6 +125,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) CHECK_VALUEMAP(GPR, 32); CHECK_VALUEMAP(GPR, 64); + CHECK_VALUEMAP(GPR, 128); CHECK_VALUEMAP(FPR, 16); CHECK_VALUEMAP(FPR, 32); CHECK_VALUEMAP(FPR, 64); @@ -142,6 +144,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) CHECK_VALUEMAP_3OPS(GPR, 32); CHECK_VALUEMAP_3OPS(GPR, 64); + CHECK_VALUEMAP_3OPS(GPR, 128); CHECK_VALUEMAP_3OPS(FPR, 32); CHECK_VALUEMAP_3OPS(FPR, 64); CHECK_VALUEMAP_3OPS(FPR, 128); @@ -871,12 +874,16 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpRegBankIdx[3] = PMI_FirstGPR; break; case TargetOpcode::G_EXTRACT: { - // For s128 sources we have to use fpr. + // For s128 sources we have to use fpr unless we know otherwise. + auto Src = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - if (SrcTy.getSizeInBits() == 128) { - OpRegBankIdx[0] = PMI_FirstFPR; - OpRegBankIdx[1] = PMI_FirstFPR; - } + if (SrcTy.getSizeInBits() != 128) + break; + auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass + ? PMI_FirstGPR + : PMI_FirstFPR; + OpRegBankIdx[0] = Idx; + OpRegBankIdx[1] = Idx; break; } case TargetOpcode::G_BUILD_VECTOR: { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h index 019017b..2d76e48 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -34,8 +34,9 @@ protected: PMI_FPR512, PMI_GPR32, PMI_GPR64, + PMI_GPR128, PMI_FirstGPR = PMI_GPR32, - PMI_LastGPR = PMI_GPR64, + PMI_LastGPR = PMI_GPR128, PMI_FirstFPR = PMI_FPR16, PMI_LastFPR = PMI_FPR512, PMI_Min = PMI_FirstFPR, @@ -48,16 +49,16 @@ protected: enum ValueMappingIdx { InvalidIdx = 0, First3OpsIdx = 1, - Last3OpsIdx = 22, + Last3OpsIdx = 25, DistanceBetweenRegBanks = 3, - FirstCrossRegCpyIdx = 25, - LastCrossRegCpyIdx = 39, + FirstCrossRegCpyIdx = 28, + LastCrossRegCpyIdx = 42, DistanceBetweenCrossRegCpy = 2, - FPExt16To32Idx = 41, - FPExt16To64Idx = 43, - FPExt32To64Idx = 45, - FPExt64To128Idx = 47, - Shift64Imm = 49 + FPExt16To32Idx = 44, + FPExt16To64Idx = 46, + FPExt32To64Idx = 48, + FPExt64To128Idx = 50, + Shift64Imm = 52, }; static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll new file mode 100644 index 0000000..43d3a1e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-LLSC-O1 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=apple-a13 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O1 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-LLSC-O0 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -mcpu=apple-a13 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O0 +@var = global i128 0 + +define void @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +; CHECK-LLSC-O1-LABEL: val_compare_and_swap: +; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; [... LOTS of stuff that is generic IR unrelated to atomic operations ...] +; CHECK-LLSC-O1: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; +; CHECK-CAS-O1-LABEL: val_compare_and_swap: +; CHECK-CAS-O1: caspa x2, x3, x4, x5, [x0] +; CHECK-CAS-O1: mov v[[OLD:[0-9]+]].d[0], x2 +; CHECK-CAS-O1: mov v[[OLD]].d[1], x3 +; CHECK-CAS-O1: str q[[OLD]], [x0] + +; CHECK-LLSC-O0-LABEL: val_compare_and_swap: +; CHECK-LLSC-O0: .LBB0_1: +; CHECK-LLSC-O0: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK-LLSC-O0: cmp [[OLD_LO]], x2 +; CHECK-LLSC-O0: cset [[EQUAL_TMP:w[0-9]+]], ne +; CHECK-LLSC-O0: cmp [[OLD_HI]], x3 +; CHECK-LLSC-O0: cinc [[EQUAL:w[0-9]+]], [[EQUAL_TMP]], ne +; CHECK-LLSC-O0: cbnz [[EQUAL]], .LBB0_3 +; CHECK-LLSC-O0: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0] +; CHECK-LLSC-O0: cbnz [[STATUS]], .LBB0_1 +; CHECK-LLSC-O0: .LBB0_3: +; CHECK-LLSC-O0: mov v[[OLD:[0-9]+]].d[0], [[OLD_LO]] +; CHECK-LLSC-O0: mov v[[OLD]].d[1], [[OLD_HI]] +; CHECK-LLSC-O0: str q[[OLD]], [x0] + + +; CHECK-CAS-O0-LABEL: val_compare_and_swap: +; CHECK-CAS-O0: str x3, [sp, #[[SLOT:[0-9]+]]] +; CHECK-CAS-O0: mov [[NEW_HI_TMP:x[0-9]+]], x5 +; CHECK-CAS-O0: ldr [[DESIRED_HI_TMP:x[0-9]+]], [sp, #[[SLOT]]] +; CHECK-CAS-O0: mov [[DESIRED_HI:x[0-9]+]], [[DESIRED_HI_TMP]] +; CHECK-CAS-O0: mov [[NEW_HI:x[0-9]+]], [[NEW_HI_TMP]] +; CHECK-CAS-O0: caspa x2, [[DESIRED_HI]], x4, [[NEW_HI]], [x0] +; CHECK-CAS-O0: mov [[OLD_LO:x[0-9]+]], x2 +; CHECK-CAS-O0: mov [[OLD_HI:x[0-9]+]], x3 +; CHECK-CAS-O0: mov v[[OLD:[0-9]+]].d[0], [[OLD_LO]] +; CHECK-CAS-O0: mov v[[OLD]].d[1], [[OLD_HI]] +; CHECK-CAS-O0: str q[[OLD]], [x0] + +%pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + %val = extractvalue { i128, i1 } %pair, 0 + store i128 %val, i128* %p + ret void +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir new file mode 100644 index 0000000..7497563 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir @@ -0,0 +1,73 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=arm64-apple-ios -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=CHECK-NOLSE +# RUN: llc -mtriple=arm64-apple-ios -mcpu=apple-a13 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=CHECK-LSE + +--- +name: compare_swap_128 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0_x1, $x1 + + liveins: $x0, $x1, $x2, $x3, $x4 + + ; CHECK-LABEL: name: compare_swap_128 + ; CHECK: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4 + ; CHECK: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64) + ; CHECK: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64) + ; CHECK: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64) + ; CHECK: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64) + ; CHECK: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32 = CMP_SWAP_128 [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire 16) + ; CHECK: [[COPY9:%[0-9]+]]:gpr64 = COPY %16 + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64) + ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK: RET_ReallyLR + ; CHECK-NOLSE-LABEL: name: compare_swap_128 + ; CHECK-NOLSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4 + ; CHECK-NOLSE: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0 + ; CHECK-NOLSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NOLSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NOLSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK-NOLSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK-NOLSE: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64) + ; CHECK-NOLSE: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64) + ; CHECK-NOLSE: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64) + ; CHECK-NOLSE: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64) + ; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32 = CMP_SWAP_128 [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire 16) + ; CHECK-NOLSE: [[COPY9:%[0-9]+]]:gpr64 = COPY %16 + ; CHECK-NOLSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64) + ; CHECK-NOLSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK-NOLSE: RET_ReallyLR + ; CHECK-LSE-LABEL: name: compare_swap_128 + ; CHECK-LSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4 + ; CHECK-LSE: [[COPY:%[0-9]+]]:gpr64sp(p0) = COPY $x0 + ; CHECK-LSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-LSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-LSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK-LSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK-LSE: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64 + ; CHECK-LSE: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64 + ; CHECK-LSE: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire 16) + ; CHECK-LSE: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0 + ; CHECK-LSE: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64 + ; CHECK-LSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64) + ; CHECK-LSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK-LSE: RET_ReallyLR + %0:_(p0) = COPY $x0 + %3:_(s64) = COPY $x1 + %4:_(s64) = COPY $x2 + %1:_(s128) = G_MERGE_VALUES %3(s64), %4(s64) + %5:_(s64) = COPY $x3 + %6:_(s64) = COPY $x4 + %2:_(s128) = G_MERGE_VALUES %5(s64), %6(s64) + %7:_(s128), %8:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p0), %1, %2 :: (load store acquire acquire 16) + G_STORE %7(s128), %0(p0) :: (store 16) + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index c888ac2..8388ab5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -180,7 +180,6 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_ATOMIC_CMPXCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_ATOMICRMW_XCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir index 867e6a0..f9699d3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir @@ -18,6 +18,27 @@ body: | %0:_(s128) = COPY $q0 %1:_(s64) = G_EXTRACT %0(s128), 0 $d2 = COPY %1(s64) + RET_ReallyLR implicit $d2 ... +--- +name: extract_s64_s128_gpr +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0_x1, $x1 + + ; CHECK-LABEL: name: extract_s64_s128_gpr + ; CHECK: liveins: $x0_x1, $x1 + ; CHECK: [[CASPX:%[0-9]+]]:xseqpairsclass(s128) = CASPX $x0_x1, $x0_x1, $x0 + ; CHECK: [[EXTRACT:%[0-9]+]]:gpr(s64) = G_EXTRACT [[CASPX]](s128), 0 + ; CHECK: RET_ReallyLR + %0:xseqpairsclass = CASPX $x0_x1, $x0_x1, $x0 + %1:_(s64) = G_EXTRACT %0:xseqpairsclass(s128), 0 + + RET_ReallyLR + +... -- 2.7.4