From e36d7716c38ba1ec44d79b668d15584ecc725d5f Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Thu, 11 Aug 2016 17:38:33 +0000 Subject: [PATCH] Make TwoAddressInstructionPass::rescheduleMIBelowKill subreg-aware This fixes PR28824. Differential Revision: https://reviews.llvm.org/D23220 llvm-svn: 278370 --- llvm/lib/CodeGen/TwoAddressInstructionPass.cpp | 39 +++++++++++++++++--------- llvm/test/CodeGen/X86/pr28824.ll | 23 +++++++++++++++ 2 files changed, 48 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/X86/pr28824.ll diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 3d9a518..8feb18b 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -29,7 +29,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -539,6 +539,16 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { return TRI->regsOverlap(RegA, RegB); } +// Returns true if Reg is equal or aliased to at least one register in Set. +static bool regOverlapsSet(const SmallVectorImpl &Set, unsigned Reg, + const TargetRegisterInfo *TRI) { + for (unsigned R : Set) + if (TRI->regsOverlap(R, Reg)) + return true; + + return false; +} + /// Return true if it's potentially profitable to commute the two-address /// instruction that's being processed. bool @@ -864,9 +874,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // FIXME: Needs more sophisticated heuristics. return false; - SmallSet Uses; - SmallSet Kills; - SmallSet Defs; + SmallVector Uses; + SmallVector Kills; + SmallVector Defs; for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; @@ -874,12 +884,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, if (!MOReg) continue; if (MO.isDef()) - Defs.insert(MOReg); + Defs.push_back(MOReg); else { - Uses.insert(MOReg); + Uses.push_back(MOReg); if (MOReg != Reg && (MO.isKill() || (LIS && isPlainlyKilled(MI, MOReg, LIS)))) - Kills.insert(MOReg); + Kills.push_back(MOReg); } } @@ -888,8 +898,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator AfterMI = std::next(Begin); MachineBasicBlock::iterator End = AfterMI; - while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { - Defs.insert(End->getOperand(0).getReg()); + while (End->isCopy() && + regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) { + Defs.push_back(End->getOperand(0).getReg()); ++End; } @@ -915,21 +926,21 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, if (!MOReg) continue; if (MO.isDef()) { - if (Uses.count(MOReg)) + if (regOverlapsSet(Uses, MOReg, TRI)) // Physical register use would be clobbered. return false; - if (!MO.isDead() && Defs.count(MOReg)) + if (!MO.isDead() && regOverlapsSet(Defs, MOReg, TRI)) // May clobber a physical register def. // FIXME: This may be too conservative. It's ok if the instruction // is sunken completely below the use. return false; } else { - if (Defs.count(MOReg)) + if (regOverlapsSet(Defs, MOReg, TRI)) return false; bool isKill = MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS)); - if (MOReg != Reg && - ((isKill && Uses.count(MOReg)) || Kills.count(MOReg))) + if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) || + regOverlapsSet(Kills, MOReg, TRI))) // Don't want to extend other live ranges and update kills. return false; if (MOReg == Reg && !isKill) diff --git a/llvm/test/CodeGen/X86/pr28824.ll b/llvm/test/CodeGen/X86/pr28824.ll new file mode 100644 index 0000000..ced1f00 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr28824.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s + +@d = global i32 0, align 4 + +; Verify the sar happens before ecx is clobbered with the parameter being +; passed to fn3 +; CHECK-LABEL: fn4 +; CHECK: movb d, %cl +; CHECK: sarl %cl +; CHECK: movl $2, %ecx +define i32 @fn4(i32 %i) #0 { +entry: + %0 = load i32, i32* @d, align 4 + %shr = ashr i32 %i, %0 + tail call fastcc void @fn3(i32 2, i32 5, i32 %shr, i32 %i) + %cmp = icmp slt i32 %shr, 1 + %. = zext i1 %cmp to i32 + ret i32 %. +} + +declare void @fn3(i32 %p1, i32 %p2, i32 %p3, i32 %p4) #0 + +attributes #0 = { nounwind } -- 2.7.4