From f44fc5219f9e16f51de8eaa50a93fc6e7afcacba Mon Sep 17 00:00:00 2001 From: James Y Knight Date: Wed, 16 Mar 2016 22:12:04 +0000 Subject: [PATCH] Tweak some atomics functions in preparation for larger changes; NFC. - Rename getATOMIC to getSYNC, as llvm will soon be able to emit both '__sync' libcalls and '__atomic' libcalls, and this function is for the '__sync' ones. - getInsertFencesForAtomic() has been replaced with shouldInsertFencesForAtomic(Instruction), so that the decision can be made per-instruction. This functionality will be used soon. - emitLeadingFence/emitTrailingFence are no longer called if shouldInsertFencesForAtomic returns false, and thus don't need to check the condition themselves. llvm-svn: 263665 --- llvm/include/llvm/CodeGen/RuntimeLibcalls.h | 2 +- llvm/include/llvm/Target/TargetLowering.h | 33 ++++++---------------- llvm/lib/CodeGen/AtomicExpandPass.cpp | 27 ++++++++++-------- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 2 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 14 ++++----- llvm/lib/Target/ARM/ARMISelLowering.h | 5 ++++ llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 1 - llvm/lib/Target/Mips/MipsISelLowering.cpp | 1 - llvm/lib/Target/Mips/MipsISelLowering.h | 4 +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1 - llvm/lib/Target/PowerPC/PPCISelLowering.h | 4 +++ llvm/lib/Target/Sparc/SparcISelLowering.cpp | 4 --- llvm/lib/Target/Sparc/SparcISelLowering.h | 7 +++++ llvm/lib/Target/XCore/XCoreISelLowering.cpp | 1 - llvm/lib/Target/XCore/XCoreISelLowering.h | 3 ++ 17 files changed, 58 insertions(+), 56 deletions(-) diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h index 312c0a4..8beb55d 100644 --- a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h @@ -430,7 +430,7 @@ namespace RTLIB { /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or /// UNKNOWN_LIBCALL if there is none. - Libcall getATOMIC(unsigned Opc, MVT VT); + Libcall getSYNC(unsigned Opc, MVT VT); } } diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 649a8b6..099b22f 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -1003,12 +1003,6 @@ public: return PrefLoopAlignment; } - /// Return whether the DAG builder should automatically insert fences and - /// reduce ordering for atomics. - bool getInsertFencesForAtomic() const { - return InsertFencesForAtomic; - } - /// Return true if the target stores stack protector cookies at a fixed offset /// in some non-standard address space, and populates the address space and /// offset as appropriate. @@ -1052,6 +1046,13 @@ public: /// \name Helpers for atomic expansion. /// @{ + /// Whether AtomicExpandPass should automatically insert fences and reduce + /// ordering for this atomic. This should be true for most architectures with + /// weak memory ordering. Defaults to false. + virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { + return false; + } + /// Perform a load-linked operation on Addr, returning a "Value *" with the /// corresponding pointee type. This may entail some non-trivial operations to /// truncate or reconstruct types that will be illegal in the backend. See @@ -1070,12 +1071,12 @@ public: /// Inserts in the IR a target-specific intrinsic specifying a fence. /// It is called by AtomicExpandPass before expanding an - /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad. + /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad + /// if shouldInsertFencesForAtomic returns true. /// RMW and CmpXchg set both IsStore and IsLoad to true. /// This function should either return a nullptr, or a pointer to an IR-level /// Instruction*. Even complex fence sequences can be represented by a /// single Instruction* through an intrinsic to be lowered later. - /// Backends with !getInsertFencesForAtomic() should keep a no-op here. /// Backends should override this method to produce target-specific intrinsic /// for their fences. /// FIXME: Please note that the default implementation here in terms of @@ -1101,9 +1102,6 @@ public: virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (!getInsertFencesForAtomic()) - return nullptr; - if (isAtLeastRelease(Ord) && IsStore) return Builder.CreateFence(Ord); else @@ -1113,9 +1111,6 @@ public: virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (!getInsertFencesForAtomic()) - return nullptr; - if (isAtLeastAcquire(Ord)) return Builder.CreateFence(Ord); else @@ -1441,12 +1436,6 @@ protected: MinStackArgumentAlignment = Align; } - /// Set if the DAG builder should automatically insert fences and reduce the - /// order of atomic memory operations to Monotonic. - void setInsertFencesForAtomic(bool fence) { - InsertFencesForAtomic = fence; - } - public: //===--------------------------------------------------------------------===// // Addressing mode description hooks (used by LSR etc). @@ -1856,10 +1845,6 @@ private: /// The preferred loop alignment. unsigned PrefLoopAlignment; - /// Whether the DAG builder should automatically insert fences and reduce - /// ordering for atomics. (This will be set for for most architectures with - /// weak memory ordering.) - bool InsertFencesForAtomic; /// If set to a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 6ac8a83..c7e7efd 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -100,7 +100,7 @@ bool AtomicExpand::runOnFunction(Function &F) { assert((LI || SI || RMWI || CASI || isa(I)) && "Unknown atomic instruction"); - if (TLI->getInsertFencesForAtomic()) { + if (TLI->shouldInsertFencesForAtomic(I)) { auto FenceOrdering = Monotonic; bool IsStore, IsLoad; if (LI && isAtLeastAcquire(LI->getOrdering())) { @@ -514,12 +514,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { BasicBlock *BB = CI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); - // If getInsertFencesForAtomic() returns true, then the target does not want - // to deal with memory orders, and emitLeading/TrailingFence should take care - // of everything. Otherwise, emitLeading/TrailingFence are no-op and we + // If shouldInsertFencesForAtomic() returns true, then the target does not + // want to deal with memory orders, and emitLeading/TrailingFence should take + // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we // should preserve the ordering. + bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI); AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; + ShouldInsertFencesForAtomic ? Monotonic : SuccessOrder; // In implementations which use a barrier to achieve release semantics, we can // delay emitting this barrier until we know a store is actually going to be @@ -530,7 +531,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // since in other cases the extra blocks naturally collapse down to the // minimal loop. Unfortunately, this puts too much stress on later // optimisations so we avoid emitting the extra logic in those cases too. - bool HasReleasedLoadBB = !CI->isWeak() && TLI->getInsertFencesForAtomic() && + bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic && SuccessOrder != Monotonic && SuccessOrder != Acquire && !F->optForMinSize(); @@ -601,7 +602,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - if (UseUnconditionalReleaseBarrier) + if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(StartBB); @@ -617,7 +618,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB); Builder.SetInsertPoint(ReleasingStoreBB); - if (!UseUnconditionalReleaseBarrier) + if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, /*IsLoad=*/true); Builder.CreateBr(TryStoreBB); @@ -647,8 +648,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Make sure later instructions don't get reordered with a fence if // necessary. Builder.SetInsertPoint(SuccessBB); - TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, - /*IsLoad=*/true); + if (ShouldInsertFencesForAtomic) + TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); @@ -659,8 +661,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); - TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, - /*IsLoad=*/true); + if (ShouldInsertFencesForAtomic) + TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, + /*IsLoad=*/true); Builder.CreateBr(ExitBB); // Finally, we have control-flow based knowledge of whether the cmpxchg diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 788528f..e7ab36a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4035,7 +4035,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_CMP_SWAP: { MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); std::pair Tmp = ExpandChainLibCall(LC, Node, false); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index abbfb1f..5804dfe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1404,7 +1404,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { unsigned Opc = Node->getOpcode(); MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); - RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); return ExpandChainLibCall(LC, Node, false); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index ca1ce05..d714ad1 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -667,7 +667,7 @@ RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { return UNKNOWN_LIBCALL; } -RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) { +RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { #define OP_TO_LIBCALL(Name, Enum) \ case Name: \ switch (VT.SimpleTy) { \ @@ -774,7 +774,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { PrefLoopAlignment = 0; GatherAllAliasesMaxDepth = 6; MinStackArgumentAlignment = 1; - InsertFencesForAtomic = false; MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple()); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 7df5519..de18baf 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -840,6 +840,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // the default expansion. If we are targeting a single threaded system, // then set them all for expand so we can lower them later into their // non-atomic form. + InsertFencesForAtomic = false; if (TM.Options.ThreadModel == ThreadModel::Single) setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); else if (Subtarget->hasAnyDataBarrier() && (!Subtarget->isThumb() || @@ -852,7 +853,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops()) { // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. - setInsertFencesForAtomic(true); + InsertFencesForAtomic = true; } } else { // If there's anything we can use as a barrier, go through custom lowering @@ -11997,9 +11998,6 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (!getInsertFencesForAtomic()) - return nullptr; - switch (Ord) { case NotAtomic: case Unordered: @@ -12025,9 +12023,6 @@ Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const { - if (!getInsertFencesForAtomic()) - return nullptr; - switch (Ord) { case NotAtomic: case Unordered: @@ -12081,6 +12076,11 @@ bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( return true; } +bool ARMTargetLowering::shouldInsertFencesForAtomic( + const Instruction *I) const { + return InsertFencesForAtomic; +} + // This has so far only been implemented for MachO. bool ARMTargetLowering::useLoadStackGuardNode() const { return Subtarget->isTargetMachO(); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index a502ca1..205efb2 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -453,6 +453,7 @@ namespace llvm { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; + bool shouldInsertFencesForAtomic(const Instruction *I) const override; TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override; bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; @@ -486,6 +487,10 @@ namespace llvm { /// unsigned ARMPCLabelIndex; + // TODO: remove this, and have shouldInsertFencesForAtomic do the proper + // check. + bool InsertFencesForAtomic; + void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 3740e7f..7524439 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1724,7 +1724,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(4); setPrefFunctionAlignment(4); setMinFunctionAlignment(2); - setInsertFencesForAtomic(false); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); if (EnableHexSDNodeSched) diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 2b3912f..2d96671 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -396,7 +396,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } - setInsertFencesForAtomic(true); if (!Subtarget.hasMips32r2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 0dc683e..f1b3124 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -561,6 +561,10 @@ namespace llvm { unsigned getJumpTableEncoding() const override; bool useSoftFloat() const override; + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return true; + } + /// Emit a sign-extension using sll/sra, seb, or seh appropriately. MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7eab758..9d1eee0 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -916,7 +916,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, break; } - setInsertFencesForAtomic(true); if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 44bcb89..c83c86f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -508,6 +508,10 @@ namespace llvm { unsigned getPrefLoopAlignment(MachineLoop *ML) const override; + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return true; + } + Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, bool IsStore, bool IsLoad) const override; Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 3761312..d08d30a 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -1603,10 +1603,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM, } // ATOMICs. - // FIXME: We insert fences for each atomics and generate sub-optimal code - // for PSO/TSO. Also, implement other atomicrmw operations. - - setInsertFencesForAtomic(true); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Legal); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h index 4e46709..c470fa8 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -180,6 +180,13 @@ namespace llvm { return VT != MVT::f128; } + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + // FIXME: We insert fences for each atomics and generate + // sub-optimal code for PSO/TSO. (Approximately nobody uses any + // mode but TSO, which makes this even more silly) + return true; + } + void ReplaceNodeResults(SDNode *N, SmallVectorImpl& Results, SelectionDAG &DAG) const override; diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp index ef572b7..90da23b 100644 --- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp +++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp @@ -156,7 +156,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM, // Atomic operations // We request a fence for ATOMIC_* instructions, to reduce them to Monotonic. // As we are always Sequential Consistent, an ATOMIC_FENCE becomes a no OP. - setInsertFencesForAtomic(true); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.h b/llvm/lib/Target/XCore/XCoreISelLowering.h index b6f09ff..287a77e 100644 --- a/llvm/lib/Target/XCore/XCoreISelLowering.h +++ b/llvm/lib/Target/XCore/XCoreISelLowering.h @@ -229,6 +229,9 @@ namespace llvm { bool isVarArg, const SmallVectorImpl &ArgsFlags, LLVMContext &Context) const override; + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return true; + } }; } -- 2.7.4