From 4379a400889c88b2da76d12170df0965ee16c78c Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 22 Jul 2019 14:16:40 +0000 Subject: [PATCH] [ARM][LowOverheadLoops] Revert remaining pseudos ARMLowOverheadLoops would assert a failure if it did not find all the pseudo instructions that comprise the hardware loop. Instead of doing this, iterate through all the instructions of the function and revert any remaining pseudo instructions that haven't been converted. Differential Revision: https://reviews.llvm.org/D65080 llvm-svn: 366691 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 68 +++++++-- .../Thumb2/LowOverheadLoops/revert-non-loop.mir | 170 +++++++++++++++++++++ 2 files changed, 226 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index cedf3bd..787da75 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -54,6 +54,8 @@ namespace { bool ProcessLoop(MachineLoop *ML); + bool RevertNonLoops(MachineFunction &MF); + void RevertWhile(MachineInstr *MI) const; void RevertLoopDec(MachineInstr *MI) const; @@ -98,9 +100,15 @@ bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) { if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } + Changed |= RevertNonLoops(MF); return Changed; } +static bool IsLoopStart(MachineInstr &MI) { + return MI.getOpcode() == ARM::t2DoLoopStart || + MI.getOpcode() == ARM::t2WhileLoopStart; +} + bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { bool Changed = false; @@ -111,15 +119,10 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML); - auto IsLoopStart = [](MachineInstr &MI) { - return MI.getOpcode() == ARM::t2DoLoopStart || - MI.getOpcode() == ARM::t2WhileLoopStart; - }; - // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. std::function SearchForStart = - [&IsLoopStart, &SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { + [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { for (auto &MI : *MBB) { if (IsLoopStart(MI)) return &MI; @@ -165,6 +168,8 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Dec = &MI; else if (MI.getOpcode() == ARM::t2LoopEnd) End = &MI; + else if (IsLoopStart(MI)) + Start = &MI; else if (MI.getDesc().isCall()) // TODO: Though the call will require LE to execute again, does this // mean we should revert? Always executing LE hopefully should be @@ -190,11 +195,16 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { break; } + LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; + if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; + if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;); + if (!Start && !Dec && !End) { LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n"); return Changed; - } if (!(Start && Dec && End)) { - report_fatal_error("Failed to find all loop components"); + } else if (!(Start && Dec && End)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n"); + return false; } if (!End->getOperand(1).isMBB() || @@ -216,10 +226,6 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Revert = true; } - LLVM_DEBUG(dbgs() << "ARM Loops:\n - Found Loop Start: " << *Start - << " - Found Loop Dec: " << *Dec - << " - Found Loop End: " << *End); - Expand(ML, Start, Dec, End, Revert); return true; } @@ -379,6 +385,44 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, } } +bool ARMLowOverheadLoops::RevertNonLoops(MachineFunction &MF) { + LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"); + bool Changed = false; + + for (auto &MBB : MF) { + SmallVector Starts; + SmallVector Decs; + SmallVector Ends; + + for (auto &I : MBB) { + if (IsLoopStart(I)) + Starts.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopDec) + Decs.push_back(&I); + else if (I.getOpcode() == ARM::t2LoopEnd) + Ends.push_back(&I); + } + + if (Starts.empty() && Decs.empty() && Ends.empty()) + continue; + + Changed = true; + + for (auto *Start : Starts) { + if (Start->getOpcode() == ARM::t2WhileLoopStart) + RevertWhile(Start); + else + Start->eraseFromParent(); + } + for (auto *Dec : Decs) + RevertLoopDec(Dec); + + for (auto *End : Ends) + RevertLoopEnd(End); + } + return Changed; +} + FunctionPass *llvm::createARMLowOverheadLoopsPass() { return new ARMLowOverheadLoops(); } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir new file mode 100644 index 0000000..4dad8f7 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/revert-non-loop.mir @@ -0,0 +1,170 @@ +# RUN: llc -mtriple=thumbv8.1m.main -run-pass=arm-low-overhead-loops %s --verify-machineinstrs -o - | FileCheck %s + +# CHECK: name: non_loop +# CHECK: bb.0.entry: +# CHECK: tBcc %bb.2, 3 +# CHECK: tB %bb.1, 14 +# CHECK: bb.1.not.preheader: +# CHECK: t2CMPri $lr, 0, 14 +# CHECK: t2Bcc %bb.3, 0 +# CHECK: tB %bb.2 +# CHECK: bb.2.while.body: +# CHECK: t2CMPri $lr, 0, 14 +# CHECK: t2Bcc %bb.2, 1 +# CHECK: tB %bb.3 +# CHECK: bb.3.while.end: + +--- | + define void @non_loop(i16* nocapture %a, i16* nocapture readonly %b, i32 %N) { + entry: + %cmp = icmp ugt i32 %N, 2 + br i1 %cmp, label %not.preheader, label %while.body + + not.preheader: ; preds = %entry + %test = call i1 @llvm.test.set.loop.iterations.i32(i32 %N) + br i1 %test, label %while.body, label %while.end + + while.body: ; preds = %while.body, %not.preheader, %entry + %a.addr.06 = phi i16* [ %incdec.ptr1, %while.body ], [ %a, %entry ], [ %a, %not.preheader ] + %b.addr.05 = phi i16* [ %incdec.ptr, %while.body ], [ %b, %entry ], [ %b, %not.preheader ] + %count = phi i32 [ %count.next, %while.body ], [ %N, %entry ], [ %N, %not.preheader ] + %incdec.ptr = getelementptr inbounds i16, i16* %b.addr.05, i32 1 + %load = load i16, i16* %b.addr.05, align 2 + %incdec.ptr1 = getelementptr inbounds i16, i16* %a.addr.06, i32 1 + store i16 %load, i16* %a.addr.06, align 2 + %count.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %count, i32 1) + %cmp1 = icmp ne i32 %count.next, 0 + br i1 %cmp1, label %while.body, label %while.end + + while.end: ; preds = %while.body, %not.preheader + ret void + } + + declare i1 @llvm.test.set.loop.iterations.i32(i32) #0 + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0 + + attributes #0 = { noduplicate nounwind } + attributes #1 = { nounwind } + +... +--- +name: non_loop +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: false +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 32 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 4, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 5, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 6, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 7, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + + frame-setup tPUSH 14, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + $sp = frame-setup tSUBspi $sp, 6, 14, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 32 + tCMPi8 renamable $r2, 3, 14, $noreg, implicit-def $cpsr + $r3 = tMOVr $r0, 14, $noreg + $r12 = tMOVr $r1, 14, $noreg + $lr = tMOVr $r2, 14, $noreg + tSTRspi killed $r2, $sp, 5, 14, $noreg :: (store 4 into %stack.0) + tSTRspi killed $r1, $sp, 4, 14, $noreg :: (store 4 into %stack.1) + tSTRspi killed $r0, $sp, 3, 14, $noreg :: (store 4 into %stack.2) + tSTRspi killed $r3, $sp, 2, 14, $noreg :: (store 4 into %stack.3) + t2STRi12 killed $r12, $sp, 4, 14, $noreg :: (store 4 into %stack.4) + t2STRi12 killed $lr, $sp, 0, 14, $noreg :: (store 4 into %stack.5) + tBcc %bb.2, 3, $cpsr + tB %bb.1, 14, $noreg + + bb.1.not.preheader: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + $r0 = tLDRspi $sp, 3, 14, $noreg :: (load 4 from %stack.2) + $r1 = tLDRspi $sp, 4, 14, $noreg :: (load 4 from %stack.1) + $r2 = tLDRspi $sp, 5, 14, $noreg :: (load 4 from %stack.0) + $r3 = tLDRspi $sp, 5, 14, $noreg :: (load 4 from %stack.0) + tSTRspi killed $r0, $sp, 2, 14, $noreg :: (store 4 into %stack.3) + tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.4) + tSTRspi killed $r2, $sp, 0, 14, $noreg :: (store 4 into %stack.5) + t2WhileLoopStart killed renamable $r3, %bb.3 + tB %bb.2, 14, $noreg + + bb.2.while.body: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + $r0 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.5) + $r1 = tLDRspi $sp, 1, 14, $noreg :: (load 4 from %stack.4) + $r2 = tLDRspi $sp, 2, 14, $noreg :: (load 4 from %stack.3) + renamable $r3, renamable $r1 = t2LDRH_POST renamable $r1, 2, 14, $noreg :: (load 2 from %ir.b.addr.05) + early-clobber renamable $r2 = t2STRH_POST killed renamable $r3, renamable $r2, 2, 14, $noreg :: (store 2 into %ir.a.addr.06) + $lr = tMOVr killed $r0, 14, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + $r0 = tMOVr $lr, 14, $noreg + tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.5) + tSTRspi killed $r1, $sp, 1, 14, $noreg :: (store 4 into %stack.4) + tSTRspi killed $r2, $sp, 2, 14, $noreg :: (store 4 into %stack.3) + t2LoopEnd killed renamable $lr, %bb.2 + tB %bb.3, 14, $noreg + + bb.3.while.end: + $sp = tADDspi $sp, 6, 14, $noreg + tPOP_RET 14, $noreg, def $r7, def $pc + +... -- 2.7.4