From 8d6ff4c0af843e1a61b76d89812aed91e358de34 Mon Sep 17 00:00:00 2001 From: Roman Tereshin Date: Sat, 20 Oct 2018 00:06:15 +0000 Subject: [PATCH] [MachineCSE][GlobalISel] Making sure MachineCSE works mid-GlobalISel (again) Change of approach, it looks like it's a much better idea to deal with the vregs that have LLTs and reg classes both properly, than trying to avoid creating those across all GlobalISel passes and all targets. The change mostly touches MachineRegisterInfo::constrainRegClass, which is apparently only used by MachineCSE. The changes are NFC for any pipeline but one that contains MachineCSE mid-GlobalISel. NOTE on isCallerPreservedOrConstPhysReg change in MachineCSE: There is no test covering it as the only way to insert a new pass (MachineCSE) from a command line I know of is llc's -run-pass option, which only works with MIR, but MIRParser freezes reserved registers upon MachineFunctions creation, making it impossible to reproduce the state that exposes the issue. Reviwed By: aditya_nandakumar Differential Revision: https://reviews.llvm.org/D53144 llvm-svn: 344822 --- llvm/include/llvm/CodeGen/MachineRegisterInfo.h | 15 +++--- llvm/lib/CodeGen/MachineCSE.cpp | 17 ++++++- llvm/lib/CodeGen/MachineRegisterInfo.cpp | 55 +++++++++------------- .../GlobalISel/machine-cse-mid-pipeline.mir | 38 ++++++++++++--- 4 files changed, 77 insertions(+), 48 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index a6836a5..fef010a 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -689,15 +689,14 @@ public: unsigned MinNumRegs = 0); /// Constrain the register class or the register bank of the virtual register - /// \p Reg to be a common subclass and a common bank of both registers - /// provided respectively. Do nothing if any of the attributes (classes, - /// banks, or low-level types) of the registers are deemed incompatible, or if - /// the resulting register will have a class smaller than before and of size - /// less than \p MinNumRegs. Return true if such register attributes exist, - /// false otherwise. + /// \p Reg (and low-level type) to be a common subclass or a common bank of + /// both registers provided respectively (and a common low-level type). Do + /// nothing if any of the attributes (classes, banks, or low-level types) of + /// the registers are deemed incompatible, or if the resulting register will + /// have a class smaller than before and of size less than \p MinNumRegs. + /// Return true if such register attributes exist, false otherwise. /// - /// \note Assumes that each register has either a low-level type or a class - /// assigned, but not both. Use this method instead of constrainRegClass and + /// \note Use this method instead of constrainRegClass and /// RegisterBankInfo::constrainGenericRegister everywhere but SelectionDAG /// ISel / FastISel and GlobalISel's InstructionSelect pass respectively. bool constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index dcb6f7c..6ee8571 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -235,6 +235,21 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, return false; } +static bool isCallerPreservedOrConstPhysReg(unsigned Reg, + const MachineFunction &MF, + const TargetRegisterInfo &TRI) { + // MachineRegisterInfo::isConstantPhysReg directly called by + // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the + // reserved registers to be frozen. That doesn't cause a problem post-ISel as + // most (if not all) targets freeze reserved registers right after ISel. + // + // It does cause issues mid-GlobalISel, however, hence the additional + // reservedRegsFrozen check. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return TRI.isCallerPreservedPhysReg(Reg, MF) || + (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg)); +} + /// hasLivePhysRegDefUses - Return true if the specified instruction read/write /// physical registers (except for dead defs of physical registers). It also /// returns the physical register def by reference if it's the only one and the @@ -254,7 +269,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; // Reading either caller preserved or constant physregs is ok. - if (!MRI->isCallerPreservedOrConstPhysReg(Reg)) + if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI)) for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) PhysRefs.insert(*AI); } diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 1da99d9..6e5ca45 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -93,36 +93,29 @@ bool MachineRegisterInfo::constrainRegAttrs(unsigned Reg, unsigned ConstrainingReg, unsigned MinNumRegs) { - auto const *OldRC = getRegClassOrNull(Reg); - auto const *RC = getRegClassOrNull(ConstrainingReg); - // A virtual register at any point must have either a low-level type - // or a class assigned, but not both. The only exception is the internals of - // GlobalISel's instruction selection pass, which is allowed to temporarily - // introduce registers with types and classes both. - assert((OldRC || getType(Reg).isValid()) && "Reg has neither class nor type"); - assert((!OldRC || !getType(Reg).isValid()) && "Reg has class and type both"); - assert((RC || getType(ConstrainingReg).isValid()) && - "ConstrainingReg has neither class nor type"); - assert((!RC || !getType(ConstrainingReg).isValid()) && - "ConstrainingReg has class and type both"); - if (OldRC && RC) - return ::constrainRegClass(*this, Reg, OldRC, RC, MinNumRegs); - // If one of the virtual registers is generic (used in generic machine - // instructions, has a low-level type, doesn't have a class), and the other is - // concrete (used in target specific instructions, doesn't have a low-level - // type, has a class), we can not unify them. - if (OldRC || RC) + const LLT RegTy = getType(Reg); + const LLT ConstrainingRegTy = getType(ConstrainingReg); + if (RegTy.isValid() && ConstrainingRegTy.isValid() && + RegTy != ConstrainingRegTy) return false; - // At this point, both registers are guaranteed to have a valid low-level - // type, and they must agree. - if (getType(Reg) != getType(ConstrainingReg)) - return false; - auto const *OldRB = getRegBankOrNull(Reg); - auto const *RB = getRegBankOrNull(ConstrainingReg); - if (OldRB) - return !RB || RB == OldRB; - if (RB) - setRegBank(Reg, *RB); + const auto ConstrainingRegCB = getRegClassOrRegBank(ConstrainingReg); + if (!ConstrainingRegCB.isNull()) { + const auto RegCB = getRegClassOrRegBank(Reg); + if (RegCB.isNull()) + setRegClassOrRegBank(Reg, ConstrainingRegCB); + else if (RegCB.is() != + ConstrainingRegCB.is()) + return false; + else if (RegCB.is()) { + if (!::constrainRegClass( + *this, Reg, RegCB.get(), + ConstrainingRegCB.get(), MinNumRegs)) + return false; + } else if (RegCB != ConstrainingRegCB) + return false; + } + if (ConstrainingRegTy.isValid()) + setType(Reg, ConstrainingRegTy); return true; } @@ -188,10 +181,6 @@ unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg, } void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) { - // Check that VReg doesn't have a class. - assert((getRegClassOrRegBank(VReg).isNull() || - !getRegClassOrRegBank(VReg).is()) && - "Can't set the size of a non-generic virtual register"); VRegToType.grow(VReg); VRegToType[VReg] = Ty; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir b/llvm/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir index 8ca81a3..667a769 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/machine-cse-mid-pipeline.mir @@ -130,9 +130,8 @@ regBankSelected: false selected: false body: | ; CHECK-LABEL: name: generic_to_concrete_copy - ; CHECK: %[[S1:[0-9]+]]:_(s32) = G_ADD %{{[0-9]+}}, %{{[0-9]+}} - ; CHECK-NEXT: %[[S2:[0-9]+]]:gpr32 = COPY %[[S1]](s32) - ; CHECK-NEXT: %{{[0-9]+}}:gpr32 = ADDWrr %[[S2]], %[[S2]] + ; CHECK: %[[S1:[0-9]+]]:gpr32(s32) = G_ADD %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %{{[0-9]+}}:gpr32 = ADDWrr %[[S1]](s32), %[[S1]](s32) bb.0: %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 @@ -149,9 +148,8 @@ regBankSelected: false selected: false body: | ; CHECK-LABEL: name: concrete_to_generic_copy - ; CHECK: %[[S1:[0-9]+]]:gpr32 = ADDWrr %{{[0-9]+}}, %{{[0-9]+}} - ; CHECK-NEXT: %[[S2:[0-9]+]]:_(s32) = COPY %[[S1]] - ; CHECK-NEXT: %{{[0-9]+}}:_(s32) = G_ADD %[[S2]], %[[S2]] + ; CHECK: %[[S1:[0-9]+]]:gpr32(s32) = ADDWrr %{{[0-9]+}}, %{{[0-9]+}} + ; CHECK-NEXT: %{{[0-9]+}}:_(s32) = G_ADD %[[S1]], %[[S1]] bb.0: %0:gpr32 = COPY $w0 %1:gpr32 = COPY $w1 @@ -278,3 +276,31 @@ body: | $w0 = COPY %23(s32) RET_ReallyLR implicit $w0 ... +--- +name: variadic_defs_unmerge_vector_constraints_mix +legalized: true +regBankSelected: false +selected: false +body: | + ; CHECK-LABEL: name: variadic_defs_unmerge_vector_constraints_mix + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[UV0:%[0-9]+]]:gpr(s32), [[UV1:%[0-9]+]]:gpr(s32), [[UV2:%[0-9]+]]:gpr32(s32), [[UV3:%[0-9]+]]:gpr32(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[ADD0:%[0-9]+]]:_(s32) = G_ADD [[UV0]], [[UV1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr32(s32) = ADDWrr [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD0]], [[ADD1]] + ; CHECK-NEXT: $w0 = COPY [[ADD2]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + bb.0: + %0 :_(<4 x s32>) = COPY $q0 + %1 :_(s32), %2 : _ (s32), %3 :_(s32), %4 : _ (s32) = G_UNMERGE_VALUES %0(<4 x s32>) + %5 :_(s32), %6 :gpr(s32), %7 :_(s32), %8 : _ (s32) = G_UNMERGE_VALUES %0(<4 x s32>) + %9 :_(s32), %10: _ (s32), %11:_(s32), %12: _ (s32) = G_UNMERGE_VALUES %0(<4 x s32>) + %13:_(s32), %14: _ (s32), %15:_(s32), %16:gpr32(s32) = G_UNMERGE_VALUES %0(<4 x s32>) + %21:gpr(s32) = COPY %1(s32) + %17:_(s32) = G_ADD %21, %6 + %18:gpr32 = COPY %11(s32) + %19:gpr32(s32) = ADDWrr %18, %16 + %20:_(s32) = G_ADD %17, %19 + $w0 = COPY %20(s32) + RET_ReallyLR implicit $w0 +... -- 2.7.4