From 5ba804bc11ec0b01d959e22cb009f5e03cf8ab06 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Wed, 15 Nov 2017 21:06:22 +0000 Subject: [PATCH] [AArch64] Refactor the loads and stores optimizer Move remaining inline matching of instructions of some optimizations into separate functions, like in the other optimizations. Otherwise, NFC. Differential revision: https://reviews.llvm.org/D40090 llvm-svn: 318335 --- .../Target/AArch64/AArch64LoadStoreOptimizer.cpp | 286 ++++++++++----------- 1 file changed, 143 insertions(+), 143 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 3458490..95ed522 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -168,6 +168,9 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Find and promote load instructions which read directly from store. bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI); + // Find and merge a base register updates before or after a ld/st instruction. + bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI); + bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt); bool runOnMachineFunction(MachineFunction &Fn) override; @@ -578,6 +581,75 @@ static bool isPromotableZeroStoreInst(MachineInstr &MI) { getLdStRegOp(MI).getReg() == AArch64::WZR; } +static bool isPromotableLoadFromStore(MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + // Scaled instructions. + case AArch64::LDRBBui: + case AArch64::LDRHHui: + case AArch64::LDRWui: + case AArch64::LDRXui: + // Unscaled instructions. + case AArch64::LDURBBi: + case AArch64::LDURHHi: + case AArch64::LDURWi: + case AArch64::LDURXi: + return true; + } +} + +static bool isMergeableLdStUpdate(MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + return false; + // Scaled instructions. + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + case AArch64::STRXui: + case AArch64::STRWui: + case AArch64::STRHHui: + case AArch64::STRBBui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::LDRXui: + case AArch64::LDRWui: + case AArch64::LDRHHui: + case AArch64::LDRBBui: + // Unscaled instructions. + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURWi: + case AArch64::LDURXi: + // Paired instructions. + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPWi: + case AArch64::LDPXi: + case AArch64::STPSi: + case AArch64::STPDi: + case AArch64::STPQi: + case AArch64::STPWi: + case AArch64::STPXi: + // Make sure this is a reg+imm (as opposed to an address reloc). + if (!getLdStOffsetOp(MI).isImm()) + return false; + + return true; + } +} + MachineBasicBlock::iterator AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I, MachineBasicBlock::iterator MergeMI, @@ -1294,10 +1366,13 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I, } (void)MIB; - if (IsPreIdx) + if (IsPreIdx) { + ++NumPreFolded; DEBUG(dbgs() << "Creating pre-indexed load/store."); - else + } else { + ++NumPostFolded; DEBUG(dbgs() << "Creating post-indexed load/store."); + } DEBUG(dbgs() << " Replacing instructions:\n "); DEBUG(I->print(dbgs())); DEBUG(dbgs() << " "); @@ -1558,6 +1633,60 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { return false; } +bool AArch64LoadStoreOpt::tryToMergeLdStUpdate + (MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator E = MI.getParent()->end(); + MachineBasicBlock::iterator Update; + + // Look forward to try to form a post-index instruction. For example, + // ldr x0, [x20] + // add x20, x20, #32 + // merged into: + // ldr x0, [x20], #32 + Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); + return true; + } + + // Don't know how to handle unscaled pre/post-index versions below, so bail. + if (TII->isUnscaledLdSt(MI.getOpcode())) + return false; + + // Look back to try to find a pre-index instruction. For example, + // add x0, x0, #8 + // ldr x1, [x0] + // merged into: + // ldr x1, [x0, #8]! + Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); + return true; + } + + // The immediate in the load/store is scaled by the size of the memory + // operation. The immediate in the add we're looking for, + // however, is not, so adjust here. + int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); + + // Look forward to try to find a post-index instruction. For example, + // ldr x1, [x0, #64] + // add x0, x0, #64 + // merged into: + // ldr x1, [x0, #64]! + Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); + return true; + } + + return false; +} + bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { bool Modified = false; @@ -1573,29 +1702,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // lsr w2, w1, #16 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { - MachineInstr &MI = *MBBI; - switch (MI.getOpcode()) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - // Scaled instructions. - case AArch64::LDRBBui: - case AArch64::LDRHHui: - case AArch64::LDRWui: - case AArch64::LDRXui: - // Unscaled instructions. - case AArch64::LDURBBi: - case AArch64::LDURHHi: - case AArch64::LDURWi: - case AArch64::LDURXi: - if (tryToPromoteLoadFromStore(MBBI)) { - Modified = true; - break; - } + if (isPromotableLoadFromStore(*MBBI) && tryToPromoteLoadFromStore(MBBI)) + Modified = true; + else ++MBBI; - break; - } } // 2) Merge adjacent zero stores into a wider store. // e.g., @@ -1608,17 +1718,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // str wzr, [x0, #4] // ; becomes // str xzr, [x0] - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - EnableNarrowZeroStOpt && MBBI != E;) { - if (isPromotableZeroStoreInst(*MBBI)) { - if (tryToMergeZeroStInst(MBBI)) { + if (EnableNarrowZeroStOpt) + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + if (isPromotableZeroStoreInst(*MBBI) && tryToMergeZeroStInst(MBBI)) Modified = true; - } else + else ++MBBI; - } else - ++MBBI; - } - + } // 3) Find loads and stores that can be merged into a single load or store // pair instruction. // e.g., @@ -1642,117 +1749,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, // ldr x0, [x2], #4 for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { - MachineInstr &MI = *MBBI; - // Do update merging. It's simpler to keep this separate from the above - // switchs, though not strictly necessary. - unsigned Opc = MI.getOpcode(); - switch (Opc) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - // Scaled instructions. - case AArch64::STRSui: - case AArch64::STRDui: - case AArch64::STRQui: - case AArch64::STRXui: - case AArch64::STRWui: - case AArch64::STRHHui: - case AArch64::STRBBui: - case AArch64::LDRSui: - case AArch64::LDRDui: - case AArch64::LDRQui: - case AArch64::LDRXui: - case AArch64::LDRWui: - case AArch64::LDRHHui: - case AArch64::LDRBBui: - // Unscaled instructions. - case AArch64::STURSi: - case AArch64::STURDi: - case AArch64::STURQi: - case AArch64::STURWi: - case AArch64::STURXi: - case AArch64::LDURSi: - case AArch64::LDURDi: - case AArch64::LDURQi: - case AArch64::LDURWi: - case AArch64::LDURXi: - // Paired instructions. - case AArch64::LDPSi: - case AArch64::LDPSWi: - case AArch64::LDPDi: - case AArch64::LDPQi: - case AArch64::LDPWi: - case AArch64::LDPXi: - case AArch64::STPSi: - case AArch64::STPDi: - case AArch64::STPQi: - case AArch64::STPWi: - case AArch64::STPXi: { - // Make sure this is a reg+imm (as opposed to an address reloc). - if (!getLdStOffsetOp(MI).isImm()) { - ++MBBI; - break; - } - // Look forward to try to form a post-index instruction. For example, - // ldr x0, [x20] - // add x20, x20, #32 - // merged into: - // ldr x0, [x20], #32 - MachineBasicBlock::iterator Update = - findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false); - Modified = true; - ++NumPostFolded; - break; - } - - // Don't know how to handle unscaled pre/post-index versions below, so - // move to the next instruction. - if (TII->isUnscaledLdSt(Opc)) { - ++MBBI; - break; - } - - // Look back to try to find a pre-index instruction. For example, - // add x0, x0, #8 - // ldr x1, [x0] - // merged into: - // ldr x1, [x0, #8]! - Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); - Modified = true; - ++NumPreFolded; - break; - } - // The immediate in the load/store is scaled by the size of the memory - // operation. The immediate in the add we're looking for, - // however, is not, so adjust here. - int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI); - - // Look forward to try to find a post-index instruction. For example, - // ldr x1, [x0, #64] - // add x0, x0, #64 - // merged into: - // ldr x1, [x0, #64]! - Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true); - Modified = true; - ++NumPreFolded; - break; - } - - // Nothing found. Just move to the next instruction. + if (isMergeableLdStUpdate(*MBBI) && tryToMergeLdStUpdate(MBBI)) + Modified = true; + else ++MBBI; - break; - } - } } return Modified; -- 2.7.4