From 3f148eabe0977228410090951cdb236d881eebcd Mon Sep 17 00:00:00 2001 From: Vedant Kumar Date: Mon, 17 Feb 2020 13:53:59 -0800 Subject: [PATCH] [LiveDebugValues] Visit open var locs just once in transferRegisterDef, NFC For a file in WebKit, this brings the time spent in LiveDebugValues down from 16 minutes to 2 minutes. The reduction comes from iterating the set of open variable locations just once in transferRegisterDef. Post-patch, the most expensive item inside of transferRegisterDef is a call to VarLoc::isDescribedByReg, which we have to do. Testing: I built LNT using the Os-g cmake cache with & without this patch, then diffed the object files to verify there was no binary diff. rdar://59446577 Differential Revision: https://reviews.llvm.org/D74633 --- llvm/lib/CodeGen/LiveDebugValues.cpp | 44 ++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp index 6acaf409..259604a 100644 --- a/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -932,6 +932,11 @@ void LiveDebugValues::transferRegisterDef( const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); SparseBitVector<> KillSet; + // Max out the number of statically allocated elements in `DeadRegs`, as this + // prevents fallback to std::set::count() operations. As often as possible, we + // want a linear scan here. + SmallSet DeadRegs; + SmallVector DeadRegMasks; for (const MachineOperand &MO : MI.operands()) { // Determine whether the operand is a register def. Assume that call // instructions never clobber SP, because some backends (e.g., AArch64) @@ -939,22 +944,41 @@ void LiveDebugValues::transferRegisterDef( if (MO.isReg() && MO.isDef() && MO.getReg() && Register::isPhysicalRegister(MO.getReg()) && !(MI.isCall() && MO.getReg() == SP)) { - // Remove ranges of all aliased registers. + // Remove ranges of all aliased registers. Note: the actual removal is + // done after we finish visiting MachineOperands, for performance reasons. for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) - for (unsigned ID : OpenRanges.getVarLocs()) - if (VarLocIDs[ID].isDescribedByReg() == *RAI) - KillSet.set(ID); + // FIXME: Can we break out of this loop early if no insertion occurs? + DeadRegs.insert(*RAI); } else if (MO.isRegMask()) { // Remove ranges of all clobbered registers. Register masks don't usually // list SP as preserved. While the debug info may be off for an // instruction or two around callee-cleanup calls, transferring the - // DEBUG_VALUE across the call is still a better user experience. - for (unsigned ID : OpenRanges.getVarLocs()) { - unsigned Reg = VarLocIDs[ID].isDescribedByReg(); - if (Reg && Reg != SP && MO.clobbersPhysReg(Reg)) - KillSet.set(ID); - } + // DEBUG_VALUE across the call is still a better user experience. Note: + // the actual removal is done after we finish visiting MachineOperands, + // for performance reasons. + DeadRegMasks.push_back(MO.getRegMask()); + } + } + // For performance reasons, it's critical to iterate over the open var locs + // at most once. + for (unsigned ID : OpenRanges.getVarLocs()) { + unsigned Reg = VarLocIDs[ID].isDescribedByReg(); + if (!Reg) + continue; + + if (DeadRegs.count(Reg)) { + KillSet.set(ID); + continue; } + + if (Reg == SP) + continue; + bool AnyRegMaskKillsReg = + any_of(DeadRegMasks, [Reg](const uint32_t *RegMask) { + return MachineOperand::clobbersPhysReg(RegMask, Reg); + }); + if (AnyRegMaskKillsReg) + KillSet.set(ID); } OpenRanges.erase(KillSet, VarLocIDs); -- 2.7.4