From fde00fc25201d1ab44f31e30c417a0a2af386f0f Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 13 Dec 2016 19:08:17 +0000 Subject: [PATCH] Revert "AArch64CollectLOH: Rewrite as block-local analysis." This is not always behaving as expected as it turns out block live-in lists are only correct most of the time. Still waiting for reviews on https://reviews.llvm.org/D27559 to have them correct all of the time. See also http://llvm.org/PR31361, rdar://25117107 This reverts commit r288567. This reverts commit r288561. llvm-svn: 289570 --- llvm/lib/Target/AArch64/AArch64CollectLOH.cpp | 1120 +++++++++++++++----- .../AArch64/arm64-collect-loh-garbage-crash.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-collect-loh.ll | 17 +- llvm/test/CodeGen/AArch64/loh.mir | 180 ---- 5 files changed, 850 insertions(+), 471 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/loh.mir diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp index 142b19e..7666011 100644 --- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -110,34 +110,72 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-collect-loh" +static cl::opt +PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden, + cl::desc("Restrict analysis to registers invovled" + " in LOHs"), + cl::init(true)); + +static cl::opt +BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden, + cl::desc("Restrict analysis at basic block scope"), + cl::init(true)); + STATISTIC(NumADRPSimpleCandidate, "Number of simplifiable ADRP dominate by another"); +#ifndef NDEBUG +STATISTIC(NumADRPComplexCandidate2, + "Number of simplifiable ADRP reachable by 2 defs"); +STATISTIC(NumADRPComplexCandidate3, + "Number of simplifiable ADRP reachable by 3 defs"); +STATISTIC(NumADRPComplexCandidateOther, + "Number of simplifiable ADRP reachable by 4 or more defs"); +STATISTIC(NumADDToSTRWithImm, + "Number of simplifiable STR with imm reachable by ADD"); +STATISTIC(NumLDRToSTRWithImm, + "Number of simplifiable STR with imm reachable by LDR"); STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); +STATISTIC(NumADDToLDRWithImm, + "Number of simplifiable LDR with imm reachable by ADD"); +STATISTIC(NumLDRToLDRWithImm, + "Number of simplifiable LDR with imm reachable by LDR"); STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); +#endif // NDEBUG STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); +#ifndef NDEBUG +STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); +STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); +STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); +STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); +#endif // NDEBUG STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); +STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)" namespace { - struct AArch64CollectLOH : public MachineFunctionPass { static char ID; - AArch64CollectLOH() : MachineFunctionPass(ID) {} + AArch64CollectLOH() : MachineFunctionPass(ID) { + initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &MF) override; @@ -149,57 +187,351 @@ struct AArch64CollectLOH : public MachineFunctionPass { StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; } void getAnalysisUsage(AnalysisUsage &AU) const override { - MachineFunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); } + +private: }; -char AArch64CollectLOH::ID = 0; +/// A set of MachineInstruction. +typedef SetVector SetOfMachineInstr; +/// Map a basic block to a set of instructions per register. +/// This is used to represent the exposed uses of a basic block +/// per register. +typedef MapVector> +BlockToSetOfInstrsPerColor; +/// Map a basic block to an instruction per register. +/// This is used to represent the live-out definitions of a basic block +/// per register. +typedef MapVector> +BlockToInstrPerColor; +/// Map an instruction to a set of instructions. Used to represent the +/// mapping def to reachable uses or use to definitions. +typedef MapVector InstrToInstrs; +/// Map a basic block to a BitVector. +/// This is used to record the kill registers per basic block. +typedef MapVector BlockToRegSet; +/// Map a register to a dense id. +typedef DenseMap MapRegToId; +/// Map a dense id to a register. Used for debug purposes. +typedef SmallVector MapIdToReg; } // end anonymous namespace. -INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh", - AARCH64_COLLECT_LOH_NAME, false, false) +char AArch64CollectLOH::ID = 0; -static bool canAddBePartOfLOH(const MachineInstr &MI) { - // Check immediate to see if the immediate is an address. - switch (MI.getOperand(2).getType()) { - default: - return false; - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_JumpTableIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - return true; +INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh", + AARCH64_COLLECT_LOH_NAME, false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", + AARCH64_COLLECT_LOH_NAME, false, false) + +/// Given a couple (MBB, reg) get the corresponding set of instruction from +/// the given "sets". +/// If this couple does not reference any set, an empty set is added to "sets" +/// for this couple and returned. +/// \param nbRegs is used internally allocate some memory. It must be consistent +/// with the way sets is used. +static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, + const MachineBasicBlock &MBB, unsigned reg, + unsigned nbRegs) { + SetOfMachineInstr *result; + BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB); + if (it != sets.end()) + result = it->second.get(); + else + result = (sets[&MBB] = make_unique(nbRegs)).get(); + + return result[reg]; +} + +/// Given a couple (reg, MI) get the corresponding set of instructions from the +/// the given "sets". +/// This is used to get the uses record in sets of a definition identified by +/// MI and reg, i.e., MI defines reg. +/// If the couple does not reference anything, an empty set is added to +/// "sets[reg]". +/// \pre set[reg] is valid. +static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, + const MachineInstr &MI) { + return sets[reg][&MI]; +} + +/// Same as getUses but does not modify the input map: sets. +/// \return NULL if the couple (reg, MI) is not in sets. +static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, + const MachineInstr &MI) { + InstrToInstrs::const_iterator Res = sets[reg].find(&MI); + if (Res != sets[reg].end()) + return &(Res->second); + return nullptr; +} + +/// Initialize the reaching definition algorithm: +/// For each basic block BB in MF, record: +/// - its kill set. +/// - its reachable uses (uses that are exposed to BB's predecessors). +/// - its the generated definitions. +/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to +/// the list of uses of exposed defintions. +/// \param ADRPMode specifies to only consider ADRP instructions for generated +/// definition. It also consider definitions of ADRP instructions as uses and +/// ignore other uses. The ADRPMode is used to collect the information for LHO +/// that involve ADRP operation only. +static void initReachingDef(const MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + BlockToInstrPerColor &Gen, BlockToRegSet &Kill, + BlockToSetOfInstrsPerColor &ReachableUses, + const MapRegToId &RegToId, + const MachineInstr *DummyOp, bool ADRPMode) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned NbReg = RegToId.size(); + + for (const MachineBasicBlock &MBB : MF) { + auto &BBGen = Gen[&MBB]; + BBGen = make_unique(NbReg); + std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr); + + BitVector &BBKillSet = Kill[&MBB]; + BBKillSet.resize(NbReg); + for (const MachineInstr &MI : MBB) { + bool IsADRP = MI.getOpcode() == AArch64::ADRP; + + // Process uses first. + if (IsADRP || !ADRPMode) + for (const MachineOperand &MO : MI.operands()) { + // Treat ADRP def as use, as the goal of the analysis is to find + // ADRP defs reached by other ADRP defs. + if (!MO.isReg() || (!ADRPMode && !MO.isUse()) || + (ADRPMode && (!IsADRP || !MO.isDef()))) + continue; + unsigned CurReg = MO.getReg(); + MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); + if (ItCurRegId == RegToId.end()) + continue; + CurReg = ItCurRegId->second; + + // if CurReg has not been defined, this use is reachable. + if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) + getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI); + // current basic block definition for this color, if any, is in Gen. + if (BBGen[CurReg]) + getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI); + } + + // Process clobbers. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + // Clobbers kill the related colors. + const uint32_t *PreservedRegs = MO.getRegMask(); + + // Set generated regs. + for (const auto &Entry : RegToId) { + unsigned Reg = Entry.second; + // Use the global register ID when querying APIs external to this + // pass. + if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { + // Do not register clobbered definition for no ADRP. + // This definition is not used anyway (otherwise register + // allocation is wrong). + BBGen[Reg] = ADRPMode ? &MI : nullptr; + BBKillSet.set(Reg); + } + } + } + + // Process register defs. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned CurReg = MO.getReg(); + MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); + if (ItCurRegId == RegToId.end()) + continue; + + for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { + MapRegToId::const_iterator ItRegId = RegToId.find(*AI); + // If this alias has not been recorded, then it is not interesting + // for the current analysis. + // We can end up in this situation because of tuple registers. + // E.g., Let say we are interested in S1. When we register + // S1, we will also register its aliases and in particular + // the tuple Q1_Q2. + // Now, when we encounter Q1_Q2, we will look through its aliases + // and will find that S2 is not registered. + if (ItRegId == RegToId.end()) + continue; + + BBKillSet.set(ItRegId->second); + BBGen[ItRegId->second] = &MI; + } + BBGen[ItCurRegId->second] = &MI; + } + } + + // If we restrict our analysis to basic block scope, conservatively add a + // dummy + // use for each generated value. + if (!ADRPMode && DummyOp && !MBB.succ_empty()) + for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) + if (BBGen[CurReg]) + getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp); } } +/// Reaching def core algorithm: +/// while an Out has changed +/// for each bb +/// for each color +/// In[bb][color] = U Out[bb.predecessors][color] +/// insert reachableUses[bb][color] in each in[bb][color] +/// op.reachedUses +/// +/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) +static void reachingDefAlgorithm(const MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + BlockToSetOfInstrsPerColor &In, + BlockToSetOfInstrsPerColor &Out, + BlockToInstrPerColor &Gen, BlockToRegSet &Kill, + BlockToSetOfInstrsPerColor &ReachableUses, + unsigned NbReg) { + bool HasChanged; + do { + HasChanged = false; + for (const MachineBasicBlock &MBB : MF) { + unsigned CurReg; + for (CurReg = 0; CurReg < NbReg; ++CurReg) { + SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); + SetOfMachineInstr &BBReachableUses = + getSet(ReachableUses, MBB, CurReg, NbReg); + SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); + unsigned Size = BBOutSet.size(); + // In[bb][color] = U Out[bb.predecessors][color] + for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { + SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); + BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); + } + // insert reachableUses[bb][color] in each in[bb][color] op.reachedses + for (const MachineInstr *MI : BBInSet) { + SetOfMachineInstr &OpReachedUses = + getUses(ColorOpToReachedUses, CurReg, *MI); + OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); + } + // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) + if (!Kill[&MBB].test(CurReg)) + BBOutSet.insert(BBInSet.begin(), BBInSet.end()); + if (Gen[&MBB][CurReg]) + BBOutSet.insert(Gen[&MBB][CurReg]); + HasChanged |= BBOutSet.size() != Size; + } + } + } while (HasChanged); +} + +/// Reaching definition algorithm. +/// \param MF function on which the algorithm will operate. +/// \param[out] ColorOpToReachedUses will contain the result of the reaching +/// def algorithm. +/// \param ADRPMode specify whether the reaching def algorithm should be tuned +/// for ADRP optimization. \see initReachingDef for more details. +/// \param DummyOp if not NULL, the algorithm will work at +/// basic block scope and will set for every exposed definition a use to +/// @p DummyOp. +/// \pre ColorOpToReachedUses is an array of at least number of registers of +/// InstrToInstrs. +static void reachingDef(const MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + const MapRegToId &RegToId, bool ADRPMode = false, + const MachineInstr *DummyOp = nullptr) { + // structures: + // For each basic block. + // Out: a set per color of definitions that reach the + // out boundary of this block. + // In: Same as Out but for in boundary. + // Gen: generated color in this block (one operation per color). + // Kill: register set of killed color in this block. + // ReachableUses: a set per color of uses (operation) reachable + // for "In" definitions. + BlockToSetOfInstrsPerColor Out, In, ReachableUses; + BlockToInstrPerColor Gen; + BlockToRegSet Kill; + + // Initialize Gen, kill and reachableUses. + initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, + DummyOp, ADRPMode); + + // Algo. + if (!DummyOp) + reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, + ReachableUses, RegToId.size()); +} + +#ifndef NDEBUG +/// print the result of the reaching definition algorithm. +static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, + unsigned NbReg, const TargetRegisterInfo *TRI, + const MapIdToReg &IdToReg) { + unsigned CurReg; + for (CurReg = 0; CurReg < NbReg; ++CurReg) { + if (ColorOpToReachedUses[CurReg].empty()) + continue; + DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); + + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + DEBUG(dbgs() << "Def:\n"); + DEBUG(DefsIt.first->print(dbgs())); + DEBUG(dbgs() << "Reachable uses:\n"); + for (const MachineInstr *MI : DefsIt.second) { + DEBUG(MI->print(dbgs())); + } + } + } +} +#endif // NDEBUG + /// Answer the following question: Can Def be one of the definition /// involved in a part of a LOH? -static bool canDefBePartOfLOH(const MachineInstr &MI) { +static bool canDefBePartOfLOH(const MachineInstr *Def) { + unsigned Opc = Def->getOpcode(); // Accept ADRP, ADDLow and LOADGot. - switch (MI.getOpcode()) { + switch (Opc) { default: return false; case AArch64::ADRP: return true; case AArch64::ADDXri: - return canAddBePartOfLOH(MI); + // Check immediate to see if the immediate is an address. + switch (Def->getOperand(2).getType()) { + default: + return false; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return true; + } case AArch64::LDRXui: // Check immediate to see if the immediate is an address. - switch (MI.getOperand(2).getType()) { + switch (Def->getOperand(2).getType()) { default: return false; case MachineOperand::MO_GlobalAddress: - return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT; + return true; } } + // Unreachable. + return false; } /// Check whether the given instruction can the end of a LOH chain involving a /// store. -static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) { - switch (MI.getOpcode()) { +static bool isCandidateStore(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { default: return false; case AArch64::STRBBui: @@ -211,19 +543,109 @@ static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) { case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: - // We can only optimize the index operand. // In case we have str xA, [xA, #imm], this is two different uses // of xA and we cannot fold, otherwise the xA stored may be wrong, // even if #imm == 0. - return MI.getOperandNo(&MO) == 1 && - MI.getOperand(0).getReg() != MI.getOperand(1).getReg(); + if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) + return true; + } + return false; +} + +/// Given the result of a reaching definition algorithm in ColorOpToReachedUses, +/// Build the Use to Defs information and filter out obvious non-LOH candidates. +/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. +/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, +/// i.e., no simple chain. +/// \param ADRPMode -- \see initReachingDef. +static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, + const InstrToInstrs *ColorOpToReachedUses, + const MapRegToId &RegToId, + bool ADRPMode = false) { + + SetOfMachineInstr NotCandidate; + unsigned NbReg = RegToId.size(); + MapRegToId::const_iterator EndIt = RegToId.end(); + for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { + // If this color is never defined, continue. + if (ColorOpToReachedUses[CurReg].empty()) + continue; + + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + for (const MachineInstr *MI : DefsIt.second) { + const MachineInstr *Def = DefsIt.first; + MapRegToId::const_iterator It; + // if all the reaching defs are not adrp, this use will not be + // simplifiable. + if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) || + (!ADRPMode && !canDefBePartOfLOH(Def)) || + (!ADRPMode && isCandidateStore(MI) && + // store are LOH candidate iff the end of the chain is used as + // base. + ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt || + It->second != CurReg))) { + NotCandidate.insert(MI); + continue; + } + // Do not consider self reaching as a simplifiable case for ADRP. + if (!ADRPMode || MI != DefsIt.first) { + UseToReachingDefs[MI].insert(DefsIt.first); + // If UsesIt has several reaching definitions, it is not + // candidate for simplificaton in non-ADRPMode. + if (!ADRPMode && UseToReachingDefs[MI].size() > 1) + NotCandidate.insert(MI); + } + } + } + } + for (const MachineInstr *Elem : NotCandidate) { + DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); + // It would have been better if we could just remove the entry + // from the map. Because of that, we have to filter the garbage + // (second.empty) in the subsequence analysis. + UseToReachingDefs[Elem].clear(); + } +} + +/// Based on the use to defs information (in ADRPMode), compute the +/// opportunities of LOH ADRP-related. +static void computeADRP(const InstrToInstrs &UseToDefs, + AArch64FunctionInfo &AArch64FI, + const MachineDominatorTree *MDT) { + DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); + for (const auto &Entry : UseToDefs) { + unsigned Size = Entry.second.size(); + if (Size == 0) + continue; + if (Size == 1) { + const MachineInstr *L2 = *Entry.second.begin(); + const MachineInstr *L1 = Entry.first; + if (!MDT->dominates(L2, L1)) { + DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 + << '\n'); + continue; + } + DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); + AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1}); + ++NumADRPSimpleCandidate; + } +#ifndef NDEBUG + else if (Size == 2) + ++NumADRPComplexCandidate2; + else if (Size == 3) + ++NumADRPComplexCandidate3; + else + ++NumADRPComplexCandidateOther; +#endif + // if Size < 1, the use should have been removed from the candidates + assert(Size >= 1 && "No reaching defs for that use!"); } } /// Check whether the given instruction can be the end of a LOH chain /// involving a load. -static bool isCandidateLoad(const MachineInstr &MI) { - switch (MI.getOpcode()) { +static bool isCandidateLoad(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { default: return false; case AArch64::LDRSBWui: @@ -238,13 +660,17 @@ static bool isCandidateLoad(const MachineInstr &MI) { case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: - return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT); + if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) + return false; + return true; } + // Unreachable. + return false; } /// Check whether the given instruction can load a litteral. -static bool supportLoadFromLiteral(const MachineInstr &MI) { - switch (MI.getOpcode()) { +static bool supportLoadFromLiteral(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { default: return false; case AArch64::LDRSWui: @@ -255,232 +681,353 @@ static bool supportLoadFromLiteral(const MachineInstr &MI) { case AArch64::LDRQui: return true; } + // Unreachable. + return false; } -/// Number of GPR registers traked by mapRegToGPRIndex() -static const unsigned N_GPR_REGS = 31; -/// Map register number to index from 0-30. -static int mapRegToGPRIndex(MCPhysReg Reg) { - static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs"); - static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs"); - if (AArch64::X0 <= Reg && Reg <= AArch64::X28) - return Reg - AArch64::X0; - if (AArch64::W0 <= Reg && Reg <= AArch64::W30) - return Reg - AArch64::W0; - // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to - // handle them as special cases. - if (Reg == AArch64::FP) - return 29; - if (Reg == AArch64::LR) - return 30; - return -1; -} +/// Check whether the given instruction is a LOH candidate. +/// \param UseToDefs is used to check that Instr is at the end of LOH supported +/// chain. +/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are +/// already been filtered out. +static bool isCandidate(const MachineInstr *Instr, + const InstrToInstrs &UseToDefs, + const MachineDominatorTree *MDT) { + if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) + return false; -/// State tracked per register. -/// The main algorithm walks backwards over a basic block maintaining this -/// datastructure for each tracked general purpose register. -struct LOHInfo { - MCLOHType Type : 8; ///< "Best" type of LOH possible. - bool IsCandidate : 1; ///< Possible LOH candidate. - bool OneUser : 1; ///< Found exactly one user (yet). - bool MultiUsers : 1; ///< Found multiple users. - const MachineInstr *MI0; ///< First instruction involved in the LOH. - const MachineInstr *MI1; ///< Second instruction involved in the LOH - /// (if any). - const MachineInstr *LastADRP; ///< Last ADRP in same register. -}; + const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); + if (Def->getOpcode() != AArch64::ADRP) { + // At this point, Def is ADDXri or LDRXui of the right type of + // symbol, because we filtered out the uses that were not defined + // by these kind of instructions (+ ADRP). -/// Update state \p Info given \p MI uses the tracked register. -static void handleUse(const MachineInstr &MI, const MachineOperand &MO, - LOHInfo &Info) { - // We have multiple uses if we already found one before. - if (Info.MultiUsers || Info.OneUser) { - Info.IsCandidate = false; - Info.MultiUsers = true; - return; - } - Info.OneUser = true; - - // Start new LOHInfo if applicable. - if (isCandidateLoad(MI)) { - Info.Type = MCLOH_AdrpLdr; - Info.IsCandidate = true; - Info.MI0 = &MI; - // Note that even this is AdrpLdr now, we can switch to a Ldr variant - // later. - } else if (isCandidateStore(MI, MO)) { - Info.Type = MCLOH_AdrpAddStr; - Info.IsCandidate = true; - Info.MI0 = &MI; - Info.MI1 = nullptr; - } else if (MI.getOpcode() == AArch64::ADDXri) { - Info.Type = MCLOH_AdrpAdd; - Info.IsCandidate = true; - Info.MI0 = &MI; - } else if (MI.getOpcode() == AArch64::LDRXui && - MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { - Info.Type = MCLOH_AdrpLdrGot; - Info.IsCandidate = true; - Info.MI0 = &MI; + // Check if this forms a simple chain: each intermediate node must + // dominates the next one. + if (!MDT->dominates(Def, Instr)) + return false; + // Move one node up in the simple chain. + if (UseToDefs.find(Def) == + UseToDefs.end() + // The map may contain garbage we have to ignore. + || + UseToDefs.find(Def)->second.empty()) + return false; + Instr = Def; + Def = *UseToDefs.find(Def)->second.begin(); } + // Check if we reached the top of the simple chain: + // - top is ADRP. + // - check the simple chain property: each intermediate node must + // dominates the next one. + if (Def->getOpcode() == AArch64::ADRP) + return MDT->dominates(Def, Instr); + return false; } -/// Update state \p Info given the tracked register is clobbered. -static void handleClobber(LOHInfo &Info) { - Info.IsCandidate = false; - Info.OneUser = false; - Info.MultiUsers = false; - Info.LastADRP = nullptr; -} - -/// Update state \p Info given that \p MI is possibly the middle instruction -/// of an LOH involving 3 instructions. -static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo, - LOHInfo &OpInfo) { - if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser)) +static bool registerADRCandidate(const MachineInstr &Use, + const InstrToInstrs &UseToDefs, + const InstrToInstrs *DefsPerColorToUses, + AArch64FunctionInfo &AArch64FI, + SetOfMachineInstr *InvolvedInLOHs, + const MapRegToId &RegToId) { + // Look for opportunities to turn ADRP -> ADD or + // ADRP -> LDR GOTPAGEOFF into ADR. + // If ADRP has more than one use. Give up. + if (Use.getOpcode() != AArch64::ADDXri && + (Use.getOpcode() != AArch64::LDRXui || + !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) + return false; + InstrToInstrs::const_iterator It = UseToDefs.find(&Use); + // The map may contain garbage that we need to ignore. + if (It == UseToDefs.end() || It->second.empty()) + return false; + const MachineInstr &Def = **It->second.begin(); + if (Def.getOpcode() != AArch64::ADRP) + return false; + // Check the number of users of ADRP. + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def.getOperand(0).getReg())->second, Def); + if (Users->size() > 1) { + ++NumADRComplexCandidate; return false; - // Copy LOHInfo for dest register to LOHInfo for source register. - if (&DefInfo != &OpInfo) { - OpInfo = DefInfo; - // Invalidate \p DefInfo because we track it in \p OpInfo now. - handleClobber(DefInfo); } + ++NumADRSimpleCandidate; + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) && + "ADRP already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) && + "ADD already involved in LOH."); + DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n'); - // Advance state machine. - assert(OpInfo.IsCandidate && "Expect valid state"); - if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) { - if (OpInfo.Type == MCLOH_AdrpLdr) { - OpInfo.Type = MCLOH_AdrpAddLdr; - OpInfo.IsCandidate = true; - OpInfo.MI1 = &MI; - return true; - } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { - OpInfo.Type = MCLOH_AdrpAddStr; - OpInfo.IsCandidate = true; - OpInfo.MI1 = &MI; - return true; - } - } else { - assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui"); - assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && - "Expected GOT relocation"); - if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { - OpInfo.Type = MCLOH_AdrpLdrGotStr; - OpInfo.IsCandidate = true; - OpInfo.MI1 = &MI; - return true; - } else if (OpInfo.Type == MCLOH_AdrpLdr) { - OpInfo.Type = MCLOH_AdrpLdrGotLdr; - OpInfo.IsCandidate = true; - OpInfo.MI1 = &MI; - return true; - } - } - return false; + AArch64FI.addLOHDirective( + Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot, + {&Def, &Use}); + return true; } -/// Update state when seeing and ADRP instruction. -static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI, - LOHInfo &Info) { - if (Info.LastADRP != nullptr) { - DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\t' - << *Info.LastADRP); - AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP}); - ++NumADRPSimpleCandidate; +/// Based on the use to defs information (in non-ADRPMode), compute the +/// opportunities of LOH non-ADRP-related +static void computeOthers(const InstrToInstrs &UseToDefs, + const InstrToInstrs *DefsPerColorToUses, + AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, + const MachineDominatorTree *MDT) { + SetOfMachineInstr *InvolvedInLOHs = nullptr; +#ifndef NDEBUG + SetOfMachineInstr InvolvedInLOHsStorage; + InvolvedInLOHs = &InvolvedInLOHsStorage; +#endif // NDEBUG + DEBUG(dbgs() << "*** Compute LOH for Others\n"); + // ADRP -> ADD/LDR -> LDR/STR pattern. + // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. + + // FIXME: When the statistics are not important, + // This initial filtering loop can be merged into the next loop. + // Currently, we didn't do it to have the same code for both DEBUG and + // NDEBUG builds. Indeed, the iterator of the second loop would need + // to be changed. + SetOfMachineInstr PotentialCandidates; + SetOfMachineInstr PotentialADROpportunities; + for (auto &Use : UseToDefs) { + // If no definition is available, this is a non candidate. + if (Use.second.empty()) + continue; + // Keep only instructions that are load or store and at the end of + // a ADRP -> ADD/LDR/Nothing chain. + // We already filtered out the no-chain cases. + if (!isCandidate(Use.first, UseToDefs, MDT)) { + PotentialADROpportunities.insert(Use.first); + continue; + } + PotentialCandidates.insert(Use.first); } - // Produce LOH directive if possible. - if (Info.IsCandidate) { - switch (Info.Type) { - case MCLOH_AdrpAdd: - DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\t' - << *Info.MI0); - AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0}); - ++NumADRSimpleCandidate; - break; - case MCLOH_AdrpLdr: - if (supportLoadFromLiteral(*Info.MI0)) { - DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\t' - << *Info.MI0); - AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0}); + // Make the following distinctions for statistics as the linker does + // know how to decode instructions: + // - ADD/LDR/Nothing make there different patterns. + // - LDR/STR make two different patterns. + // Hence, 6 - 1 base patterns. + // (because ADRP-> Nothing -> STR is not simplifiable) + + // The linker is only able to have a simple semantic, i.e., if pattern A + // do B. + // However, we want to see the opportunity we may miss if we were able to + // catch more complex cases. + + // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> + // A potential candidate becomes a candidate, if its current immediate + // operand is zero and all nodes of the chain have respectively only one user +#ifndef NDEBUG + SetOfMachineInstr DefsOfPotentialCandidates; +#endif + for (const MachineInstr *Candidate : PotentialCandidates) { + // Get the definition of the candidate i.e., ADD or LDR. + const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); + // Record the elements of the chain. + const MachineInstr *L1 = Def; + const MachineInstr *L2 = nullptr; + unsigned ImmediateDefOpc = Def->getOpcode(); + if (Def->getOpcode() != AArch64::ADRP) { + // Check the number of users of this node. + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def->getOperand(0).getReg())->second, *Def); + if (Users->size() > 1) { +#ifndef NDEBUG + // if all the uses of this def are in potential candidate, this is + // a complex candidate of level 2. + bool IsLevel2 = true; + for (const MachineInstr *MI : *Users) { + if (!PotentialCandidates.count(MI)) { + ++NumTooCplxLvl2; + IsLevel2 = false; + break; + } + } + if (IsLevel2) + ++NumCplxLvl2; +#endif // NDEBUG + PotentialADROpportunities.insert(Def); + continue; + } + L2 = Def; + Def = *UseToDefs.find(Def)->second.begin(); + L1 = Def; + } // else the element in the middle of the chain is nothing, thus + // Def already contains the first element of the chain. + + // Check the number of users of the first node in the chain, i.e., ADRP + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def->getOperand(0).getReg())->second, *Def); + if (Users->size() > 1) { +#ifndef NDEBUG + // if all the uses of this def are in the defs of the potential candidate, + // this is a complex candidate of level 1 + if (DefsOfPotentialCandidates.empty()) { + // lazy init + DefsOfPotentialCandidates = PotentialCandidates; + for (const MachineInstr *Candidate : PotentialCandidates) { + if (!UseToDefs.find(Candidate)->second.empty()) + DefsOfPotentialCandidates.insert( + *UseToDefs.find(Candidate)->second.begin()); + } + } + bool Found = false; + for (auto &Use : *Users) { + if (!DefsOfPotentialCandidates.count(Use)) { + ++NumTooCplxLvl1; + Found = true; + break; + } + } + if (!Found) + ++NumCplxLvl1; +#endif // NDEBUG + continue; + } + + bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); + // If the chain is three instructions long and ldr is the second element, + // then this ldr must load form GOT, otherwise this is not a correct chain. + if (L2 && !IsL2Add && + !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)) + continue; + SmallVector Args; + MCLOHType Kind; + if (isCandidateLoad(Candidate)) { + if (!L2) { + // At this point, the candidate LOH indicates that the ldr instruction + // may use a direct access to the symbol. There is not such encoding + // for loads of byte and half. + if (!supportLoadFromLiteral(Candidate)) + continue; + + DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate + << '\n'); + Kind = MCLOH_AdrpLdr; + Args.push_back(L1); + Args.push_back(Candidate); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); ++NumADRPToLDR; + } else { + DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") + << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate + << '\n'); + + Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; + Args.push_back(L1); + Args.push_back(L2); + Args.push_back(Candidate); + + PotentialADROpportunities.remove(L2); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && + "L2 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); +#ifndef NDEBUG + // get the immediate of the load + if (Candidate->getOperand(2).getImm() == 0) + if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToLDR; + else + ++NumLDRToLDR; + else if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToLDRWithImm; + else + ++NumLDRToLDRWithImm; +#endif // NDEBUG } - break; - case MCLOH_AdrpAddLdr: - DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\t' - << *Info.MI1 << '\t' << *Info.MI0); - AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0}); - ++NumADDToLDR; - break; - case MCLOH_AdrpAddStr: - if (Info.MI1 != nullptr) { - DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\t' - << *Info.MI1 << '\t' << *Info.MI0); - AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0}); - ++NumADDToSTR; + } else { + if (ImmediateDefOpc == AArch64::ADRP) + continue; + else { + + DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") + << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate + << '\n'); + + Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; + Args.push_back(L1); + Args.push_back(L2); + Args.push_back(Candidate); + + PotentialADROpportunities.remove(L2); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && + "L2 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); +#ifndef NDEBUG + // get the immediate of the store + if (Candidate->getOperand(2).getImm() == 0) + if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToSTR; + else + ++NumLDRToSTR; + else if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToSTRWithImm; + else + ++NumLDRToSTRWithImm; +#endif // DEBUG } - break; - case MCLOH_AdrpLdrGotLdr: - DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\t' - << *Info.MI1 << '\t' << *Info.MI0); - AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0}); - ++NumLDRToLDR; - break; - case MCLOH_AdrpLdrGotStr: - DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\t' - << *Info.MI1 << '\t' << *Info.MI0); - AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0}); - ++NumLDRToSTR; - break; - case MCLOH_AdrpLdrGot: - DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\t' - << *Info.MI0); - AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0}); - break; - case MCLOH_AdrpAdrp: - llvm_unreachable("MCLOH_AdrpAdrp not used in state machine"); } + AArch64FI.addLOHDirective(Kind, Args); } - handleClobber(Info); - Info.LastADRP = &MI; + // Now, we grabbed all the big patterns, check ADR opportunities. + for (const MachineInstr *Candidate : PotentialADROpportunities) + registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, + InvolvedInLOHs, RegToId); } -static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg, - LOHInfo *LOHInfos) { - if (!MachineOperand::clobbersPhysReg(RegMask, Reg)) +/// Look for every register defined by potential LOHs candidates. +/// Map these registers with dense id in @p RegToId and vice-versa in +/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. +static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, + MapIdToReg &IdToReg, + const TargetRegisterInfo *TRI) { + unsigned CurRegId = 0; + if (!PreCollectRegister) { + unsigned NbReg = TRI->getNumRegs(); + for (; CurRegId < NbReg; ++CurRegId) { + RegToId[CurRegId] = CurRegId; + DEBUG(IdToReg.push_back(CurRegId)); + DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); + } return; - int Idx = mapRegToGPRIndex(Reg); - if (Idx >= 0) - handleClobber(LOHInfos[Idx]); -} + } -static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { - // Handle defs and regmasks. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isRegMask()) { - const uint32_t *RegMask = MO.getRegMask(); - for (MCPhysReg Reg : AArch64::GPR32RegClass) - handleRegMaskClobber(RegMask, Reg, LOHInfos); - for (MCPhysReg Reg : AArch64::GPR64RegClass) - handleRegMaskClobber(RegMask, Reg, LOHInfos); - continue; + DEBUG(dbgs() << "** Collect Involved Register\n"); + for (const auto &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (!canDefBePartOfLOH(&MI) && + !isCandidateLoad(&MI) && !isCandidateStore(&MI)) + continue; + + // Process defs + for (MachineInstr::const_mop_iterator IO = MI.operands_begin(), + IOEnd = MI.operands_end(); + IO != IOEnd; ++IO) { + if (!IO->isReg() || !IO->isDef()) + continue; + unsigned CurReg = IO->getReg(); + for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) + if (RegToId.find(*AI) == RegToId.end()) { + DEBUG(IdToReg.push_back(*AI); + assert(IdToReg[CurRegId] == *AI && + "Reg index mismatches insertion index.")); + RegToId[*AI] = CurRegId++; + DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); + } + } } - if (!MO.isReg() || !MO.isDef()) - continue; - int Idx = mapRegToGPRIndex(MO.getReg()); - if (Idx < 0) - continue; - handleClobber(LOHInfos[Idx]); - } - // Handle uses. - for (const MachineOperand &MO : MI.uses()) { - if (!MO.isReg() || !MO.readsReg()) - continue; - int Idx = mapRegToGPRIndex(MO.getReg()); - if (Idx < 0) - continue; - handleUse(MI, MO, LOHInfos[Idx]); } } @@ -488,59 +1035,74 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; - DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n" - << "Looking in function " << MF.getName() << '\n'); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const MachineDominatorTree *MDT = &getAnalysis(); - LOHInfo LOHInfos[N_GPR_REGS]; - AArch64FunctionInfo &AFI = *MF.getInfo(); - for (const MachineBasicBlock &MBB : MF) { - // Reset register tracking state. - memset(LOHInfos, 0, sizeof(LOHInfos)); - // Live-out registers are used. - for (const MachineBasicBlock *Succ : MBB.successors()) { - for (const auto &LI : Succ->liveins()) { - int RegIdx = mapRegToGPRIndex(LI.PhysReg); - if (RegIdx >= 0) - LOHInfos[RegIdx].OneUser = true; - } - } + MapRegToId RegToId; + MapIdToReg IdToReg; + AArch64FunctionInfo *AArch64FI = MF.getInfo(); + assert(AArch64FI && "No MachineFunctionInfo for this function!"); - // Walk the basic block backwards and update the per register state machine - // in the process. - for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - case AArch64::ADDXri: - case AArch64::LDRXui: - if (canDefBePartOfLOH(MI)) { - const MachineOperand &Def = MI.getOperand(0); - const MachineOperand &Op = MI.getOperand(1); - assert(Def.isReg() && Def.isDef() && "Expected reg def"); - assert(Op.isReg() && Op.isUse() && "Expected reg use"); - int DefIdx = mapRegToGPRIndex(Def.getReg()); - int OpIdx = mapRegToGPRIndex(Op.getReg()); - if (DefIdx >= 0 && OpIdx >= 0 && - handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx])) - continue; - } - break; - case AArch64::ADRP: - const MachineOperand &Op0 = MI.getOperand(0); - int Idx = mapRegToGPRIndex(Op0.getReg()); - if (Idx >= 0) { - handleADRP(MI, AFI, LOHInfos[Idx]); - continue; - } - break; - } - handleNormalInst(MI, LOHInfos); - } + DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n'); + + collectInvolvedReg(MF, RegToId, IdToReg, TRI); + if (RegToId.empty()) + return false; + + MachineInstr *DummyOp = nullptr; + if (BasicBlockScopeOnly) { + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + // For local analysis, create a dummy operation to record uses that are not + // local. + DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); } - // Return "no change": The pass only collects information. - return false; + unsigned NbReg = RegToId.size(); + bool Modified = false; + + // Start with ADRP. + InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; + + // Compute the reaching def in ADRP mode, meaning ADRP definitions + // are first considered as uses. + reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp); + DEBUG(dbgs() << "ADRP reaching defs\n"); + DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); + + // Translate the definition to uses map into a use to definitions map to ease + // statistic computation. + InstrToInstrs ADRPToReachingDefs; + reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); + + // Compute LOH for ADRP. + computeADRP(ADRPToReachingDefs, *AArch64FI, MDT); + delete[] ColorOpToReachedUses; + + // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. + ColorOpToReachedUses = new InstrToInstrs[NbReg]; + + // first perform a regular reaching def analysis. + reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp); + DEBUG(dbgs() << "All reaching defs\n"); + DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); + + // Turn that into a use to defs to ease statistic computation. + InstrToInstrs UsesToReachingDefs; + reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); + + // Compute other than AdrpAdrp LOH. + computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId, + MDT); + delete[] ColorOpToReachedUses; + + if (BasicBlockScopeOnly) + MF.DeleteMachineInstr(DummyOp); + + return Modified; } +/// createAArch64CollectLOHPass - returns an instance of the Statistic for +/// linker optimization pass. FunctionPass *llvm::createAArch64CollectLOHPass() { return new AArch64CollectLOH(); } diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll index 727c189..4a36965 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -o - %s -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh | FileCheck %s +; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s ; Check that the LOH analysis does not crash when the analysed chained ; contains instructions that are filtered out. ; diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll index 773286e..e3df418 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh-str.ll @@ -1,4 +1,4 @@ -; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s +; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s ; Test case for . ; AdrpAddStr cannot be used when the store uses same ; register as address and value. Indeed, the related diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll index c7ba989..b697b6e 100644 --- a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll +++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -1,5 +1,5 @@ -; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s -; RUN: llc -o - %s -mtriple=arm64-linux-gnu -O2 | FileCheck %s --check-prefix=CHECK-ELF +; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF ; CHECK-ELF-NOT: .loh ; CHECK-ELF-NOT: AdrpAdrp @@ -633,14 +633,11 @@ define void @setL(<1 x i8> %t) { ; a tuple register to appear in the lowering. Thus, the target ; cpu is required to have the problem reproduced. ; CHECK-LABEL: _uninterestingSub -; CHECK: [[LOH_LABEL0:Lloh[0-9]+]]: ; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE -; CHECK: [[LOH_LABEL1:Lloh[0-9]+]]: -; CHECK: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] +; CHECK-NEXT: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] ; The tuple comes from the next instruction. ; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]] ; CHECK: ret -; CHECK: .loh AdrpLdr [[LOH_LABEL0]], [[LOH_LABEL1]] define void @uninterestingSub(i8* nocapture %row) #0 { %tmp = bitcast i8* %row to <16 x i8>* %tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16 @@ -667,10 +664,10 @@ entry: if.then.i: ret void if.end.i: -; CHECK: .loh AdrpLdrGot -; CHECK: .loh AdrpLdrGot -; CHECK: .loh AdrpAdrp -; CHECK: .loh AdrpLdr +; CHECK: .loh AdrpAdrp Lloh91, Lloh93 +; CHECK: .loh AdrpLdr Lloh91, Lloh92 +; CHECK: .loh AdrpLdrGot Lloh93, Lloh95 +; CHECK: .loh AdrpLdrGot Lloh94, Lloh96 %mul.i.i.i = fmul double undef, 1.000000e-06 %add.i.i.i = fadd double undef, %mul.i.i.i %sub.i.i = fsub double %add.i.i.i, undef diff --git a/llvm/test/CodeGen/AArch64/loh.mir b/llvm/test/CodeGen/AArch64/loh.mir deleted file mode 100644 index 4c0689d..0000000 --- a/llvm/test/CodeGen/AArch64/loh.mir +++ /dev/null @@ -1,180 +0,0 @@ -# RUN: llc -o /dev/null %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -debug-only=aarch64-collect-loh 2>&1 | FileCheck %s -# REQUIRES: asserts ---- | - define void @func0() { ret void } - - declare void @extfunc() - - @g0 = external global i32 - @g1 = external global i32 - @g2 = external global i32 - @g3 = external global i32 - @g4 = external global i32 - @g5 = external global i32 -... ---- -# Check various LOH variants. Remember that the algorithms walks the basic -# blocks backwards. -# CHECK-LABEL: ********** AArch64 Collect LOH ********** -# CHECK-LABEL: Looking in function func0 -name: func0 -body: | - bb.0: - ; CHECK: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: - ; CHECK-NEXT: %X0 = ADRP - ; CHECK-NEXT: %X0 = ADRP - %x0 = ADRP target-flags(aarch64-page) @g0 - %x0 = ADRP target-flags(aarch64-page) @g1 - %x1 = ADRP target-flags(aarch64-page) @g2 - %x1 = ADRP target-flags(aarch64-page) @g3 - %x1 = ADRP target-flags(aarch64-page) @g4 - - bb.1: - ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %X20 = ADRP - ; CHECK-NEXT: %X3 = ADDXri %X20, - ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = ADDXri %X1, - %x1 = ADRP target-flags(aarch64-page) @g0 - %x9 = SUBXri %x11, 5, 0 ; should not affect MCLOH formation - %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0 - %x20 = ADRP target-flags(aarch64-page) @g0 - BL @extfunc, csr_aarch64_aapcs ; should not clobber X20 - %x3 = ADDXri %x20, target-flags(aarch64-pageoff) @g0, 0 - - bb.2: - ; CHECK-NOT: MCLOH_AdrpAdd - %x9 = ADRP target-flags(aarch64-page) @g0 - BL @extfunc, csr_aarch64_aapcs ; clobbers x9 - %x9 = ADDXri %x9, target-flags(aarch64-pageoff) @g0, 0 - - bb.3: - ; CHECK-NOT: MCLOH_AdrpAdd - %x10 = ADRP target-flags(aarch64-page) @g0 - HINT 0, implicit def dead %x10 ; clobbers x10 - %x10 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 - - bb.4: - ; Cannot produce a LOH for multiple users - ; CHECK-NOT: MCLOH_AdrpAdd - %x10 = ADRP target-flags(aarch64-page) @g0 - HINT 0, implicit def dead %x10 ; clobbers x10 - %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 - %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 - - bb.5: - ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %X5 = ADRP - ; CHECK-NEXT: %S6 = LDRSui %X5, - ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %X4 = ADRP - ; CHECK-NEXT: %X4 = LDRXui %X4, - %x4 = ADRP target-flags(aarch64-page) @g2 - %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2 - %x5 = ADRP target-flags(aarch64-page) @g2 - %s6 = LDRSui %x5, target-flags(aarch64-pageoff) @g2 - - bb.6: - ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: - ; CHECK-NEXT: %X5 = ADRP - ; CHECK-NEXT: %X6 = LDRXui %X5, - ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: - ; CHECK-NEXT: %X4 = ADRP - ; CHECK-NEXT: %X4 = LDRXui %X4, - %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 - %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2 - %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 - %x6 = LDRXui %x5, target-flags(aarch64-pageoff, aarch64-got) @g2 - - bb.7: - ; CHECK-NOT: Adding MCLOH_AdrpLdrGot: - ; This sequence makes no sense and should not produce a LdrGot - %x11 = ADRP target-flags(aarch64-page, aarch64-got) @g5 - %s11 = LDRSui %x4, target-flags(aarch64-pageoff, aarch64-got) @g5 - - bb.8: - ; CHECK-NEXT: Adding MCLOH_AdrpAddLdr: - ; CHECK-NEXT: %X7 = ADRP [TF=1] - ; CHECK-NEXT: %X8 = ADDXri %X7, - ; CHECK-NEXT: %D1 = LDRDui %X8, 8 - %x7 = ADRP target-flags(aarch64-page) @g3 - %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0 - %d1 = LDRDui %x8, 8 - - bb.9: - ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %X3 = ADRP - ; CHECK-NEXT: %X3 = ADDXri %X3, - ; CHECK-NEXT: Adding MCLOH_AdrpAdd: - ; CHECK-NEXT: %X5 = ADRP - ; CHECK-NEXT: %X2 = ADDXri %X5, - ; CHECK-NEXT: Adding MCLOH_AdrpAddStr: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = ADDXri %X1, - ; CHECK-NEXT: STRXui %X2, %X1, 16 - %x1 = ADRP target-flags(aarch64-page) @g3 - %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0 - STRXui %x2, %x1, 16 - - ; This sequence should just produce an AdrpAdd (not AdrpAddStr) - %x5 = ADRP target-flags(aarch64-page) @g3 - %x2 = ADDXri %x5, target-flags(aarch64-pageoff) @g3, 0 - STRXui %x2, %x11, 16 - - ; This sequence should just produce an AdrpAdd (not AdrpAddStr) - %x3 = ADRP target-flags(aarch64-page) @g3 - %x3 = ADDXri %x3, target-flags(aarch64-pageoff) @g3, 0 - STRXui %x3, %x3, 16 - - bb.10: - ; CHECK-NEXT: Adding MCLOH_AdrpLdr: - ; CHECK-NEXT: %X2 = ADRP - ; CHECK-NEXT: %X2 = LDRXui %X2, - ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = LDRXui %X1, - ; CHECK-NEXT: %X1 = LDRXui %X1, 24 - %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 - %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 - %x1 = LDRXui %x1, 24 - ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotLdr) - %x2 = ADRP target-flags(aarch64-page) @g3 - %x2 = LDRXui %x2, target-flags(aarch64-pageoff) @g3 - %x2 = LDRXui %x2, 24 - - bb.11: - ; CHECK-NEXT: Adding MCLOH_AdrpLdr - ; CHECK-NEXT: %X5 = ADRP - ; CHECK-NEXT: %X5 = LDRXui %X5, - ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr: - ; CHECK-NEXT: %X1 = ADRP - ; CHECK-NEXT: %X1 = LDRXui %X1, - ; CHECK-NEXT: STRXui %X4, %X1, 32 - %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 - %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 - STRXui %x4, %x1, 32 - ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotStr) - %x5 = ADRP target-flags(aarch64-page) @g1 - %x5 = LDRXui %x5, target-flags(aarch64-pageoff) @g1 - STRXui %x11, %x5, 32 - - bb.12: - successors: %bb.13 - ; Cannot produce a LOH for multiple users - ; CHECK-NOT: MCLOH_AdrpAdd - %x10 = ADRP target-flags(aarch64-page) @g0 - HINT 0, implicit def dead %x10 ; clobbers x10 - %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 - B %bb.13 - - bb.13: - liveins: %x10 - %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 -... -- 2.7.4