From 477e3fe4f874b1c4d5896f3bfaf7b3b8a6d38103 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 3 Feb 2021 13:19:09 -0500 Subject: [PATCH] Revert "AMDGPU: Don't consider global pressure when bundling soft clauses" This reverts commit 1e377a273f59375d8e6a424f66f069b3adfa1ca4. A regression was reported. --- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 15 -- llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp | 52 ++-- .../AMDGPU/soft-clause-bundle-local-pressure.mir | 271 --------------------- 3 files changed, 19 insertions(+), 319 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/soft-clause-bundle-local-pressure.mir diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index d07da90..ba8c85a 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -127,21 +127,6 @@ public: return Res; } - /// Return the VGPR pressure at the current point - unsigned getCurrentNumVGPR() const { - return CurPressure.getVGPRNum(); - } - - /// Return the SGPR pressure at the current point - unsigned getCurrentNumSGPR() const { - return CurPressure.getSGPRNum(); - } - - /// Return the occupancy at the current point - unsigned getCurrentOccupancy(const GCNSubtarget &ST) const { - return CurPressure.getOccupancy(ST); - } - decltype(LiveRegs) moveLiveRegs() { return std::move(LiveRegs); } diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp index acaa3b3..59ce080 100644 --- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp +++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp @@ -64,11 +64,10 @@ private: bool canBundle(const MachineInstr &MI, const RegUse &Defs, const RegUse &Uses) const; - bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT, - bool IsVMEMClause); + bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT); void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const; bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses, - GCNDownwardRPTracker &RPT, bool IsVMEMClause); + GCNDownwardRPTracker &RPT); const GCNSubtarget *ST; const SIRegisterInfo *TRI; @@ -244,33 +243,21 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs, // Function returns false if pressure would hit the limit if instruction is // bundled into a memory clause. bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI, - GCNDownwardRPTracker &RPT, - bool IsVMEMClause) { - unsigned OldOccupancy = RPT.getCurrentOccupancy(*ST); - + GCNDownwardRPTracker &RPT) { // NB: skip advanceBeforeNext() call. Since all defs will be marked // early-clobber they will all stay alive at least to the end of the - // clause. Therefore we should not decrease pressure even if load pointer - // becomes dead and could otherwise be reused for destination. + // clause. Therefor we should not decrease pressure even if load + // pointer becomes dead and could otherwise be reused for destination. RPT.advanceToNext(); - - unsigned NewOccupancy = RPT.getCurrentOccupancy(*ST); - if (NewOccupancy < OldOccupancy && - NewOccupancy < MFI->getMinAllowedOccupancy()) - return false; - - // Scalar loads/clauses cannot def VGPRs, and vector loads/clauses cannot def - // SGPRs (ignoring artificial implicit operands). - if (IsVMEMClause) { - if (RPT.getCurrentNumVGPR() > MaxVGPRs) - return false; - } else { - if (RPT.getCurrentNumSGPR() > MaxSGPRs) - return false; + GCNRegPressure MaxPressure = RPT.moveMaxPressure(); + unsigned Occupancy = MaxPressure.getOccupancy(*ST); + if (Occupancy >= MFI->getMinAllowedOccupancy() && + MaxPressure.getVGPRNum() <= MaxVGPRs && + MaxPressure.getSGPRNum() <= MaxSGPRs) { + LastRecordedOccupancy = Occupancy; + return true; } - - LastRecordedOccupancy = NewOccupancy; - return true; + return false; } // Collect register defs and uses along with their lane masks and states. @@ -302,14 +289,13 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI, // Check register def/use conflicts, occupancy limits and collect def/use maps. // Return true if instruction can be bundled with previous. It it cannot // def/use maps are not updated. -bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI, RegUse &Defs, - RegUse &Uses, - GCNDownwardRPTracker &RPT, - bool IsVMEMClause) { +bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI, + RegUse &Defs, RegUse &Uses, + GCNDownwardRPTracker &RPT) { if (!canBundle(MI, Defs, Uses)) return false; - if (!checkPressure(MI, RPT, IsVMEMClause)) + if (!checkPressure(MI, RPT)) return false; collectRegUses(MI, Defs, Uses); @@ -363,7 +349,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs()); RegUse Defs, Uses; - if (!processRegUses(MI, Defs, Uses, RPT, IsVMEM)) { + if (!processRegUses(MI, Defs, Uses, RPT)) { RPT.reset(MI, &LiveRegsCopy); continue; } @@ -381,7 +367,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { // A load from pointer which was loaded inside the same bundle is an // impossible clause because we will need to write and read the same // register inside. In this case processRegUses will return false. - if (!processRegUses(*Next, Defs, Uses, RPT, IsVMEM)) + if (!processRegUses(*Next, Defs, Uses, RPT)) break; ++Length; diff --git a/llvm/test/CodeGen/AMDGPU/soft-clause-bundle-local-pressure.mir b/llvm/test/CodeGen/AMDGPU/soft-clause-bundle-local-pressure.mir deleted file mode 100644 index a9409dd..0000000 --- a/llvm/test/CodeGen/AMDGPU/soft-clause-bundle-local-pressure.mir +++ /dev/null @@ -1,271 +0,0 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s - -# The VGPR pressure at the point the scalar clause would form decreases the -# occupancy. However, this occupancy decrease is not due to the introduction of -# the bundle so the bundle should still be inserted. - ---- -name: form_sgpr_clause_high_vgpr_pressure -tracksRegLiveness: true -machineFunctionInfo: - isEntryFunction: true - waveLimiter: false - memoryBound: false - scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' - stackPtrOffsetReg: '$sgpr32' - occupancy: 10 - -body: | - bb.0: - liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24 - ; CHECK-LABEL: name: form_sgpr_clause_high_vgpr_pressure - ; CHECK: early-clobber %26:sreg_32_xm0_xexec, early-clobber %27:sreg_32_xm0_xexec = BUNDLE [[REG:%[0-9]+]] { - ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG]], 0, 0, 0 :: (load 4, addrspace 4) - ; CHECK: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG]], 8, 0, 0 :: (load 4, addrspace 4) - ; CHECK: } - %0:sreg_64 = COPY $sgpr4_sgpr5 - %1:vgpr_32 = COPY $vgpr0 - %2:vgpr_32 = COPY $vgpr1 - %3:vgpr_32 = COPY $vgpr2 - %4:vgpr_32 = COPY $vgpr3 - %5:vgpr_32 = COPY $vgpr4 - %6:vgpr_32 = COPY $vgpr5 - %7:vgpr_32 = COPY $vgpr6 - %8:vgpr_32 = COPY $vgpr7 - %9:vgpr_32 = COPY $vgpr8 - %10:vgpr_32 = COPY $vgpr9 - %11:vgpr_32 = COPY $vgpr10 - %12:vgpr_32 = COPY $vgpr11 - %13:vgpr_32 = COPY $vgpr12 - %14:vgpr_32 = COPY $vgpr13 - %15:vgpr_32 = COPY $vgpr14 - %16:vgpr_32 = COPY $vgpr15 - %17:vgpr_32 = COPY $vgpr16 - %18:vgpr_32 = COPY $vgpr17 - %19:vgpr_32 = COPY $vgpr18 - %20:vgpr_32 = COPY $vgpr19 - %21:vgpr_32 = COPY $vgpr20 - %22:vgpr_32 = COPY $vgpr21 - %23:vgpr_32 = COPY $vgpr22 - %24:vgpr_32 = COPY $vgpr23 - %27:vgpr_32 = COPY $vgpr24 - %25:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0, 0 :: (load 4, align 4, addrspace 4) - %26:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0, 0 :: (load 4, align 4, addrspace 4) - S_NOP 0, implicit %1 - S_NOP 0, implicit %2 - S_NOP 0, implicit %3 - S_NOP 0, implicit %4 - S_NOP 0, implicit %5 - S_NOP 0, implicit %6 - S_NOP 0, implicit %7 - S_NOP 0, implicit %8 - S_NOP 0, implicit %9 - S_NOP 0, implicit %10 - S_NOP 0, implicit %11 - S_NOP 0, implicit %12 - S_NOP 0, implicit %13 - S_NOP 0, implicit %14 - S_NOP 0, implicit %15 - S_NOP 0, implicit %16 - S_NOP 0, implicit %17 - S_NOP 0, implicit %18 - S_NOP 0, implicit %19 - S_NOP 0, implicit %20 - S_NOP 0, implicit %21 - S_NOP 0, implicit %22 - S_NOP 0, implicit %23 - S_NOP 0, implicit %24 - S_NOP 0, implicit %27 - S_ENDPGM 0, implicit %25, implicit %26 - -... - -# The SGPR pressure at the point the vector clause would form decreases the -# occupancy. However, this occupancy decrease is not due to the introduction of -# the bundle so the bundle should still be inserted. ---- -name: form_vgpr_clause_high_sgpr_pressure -tracksRegLiveness: true -machineFunctionInfo: - isEntryFunction: true - waveLimiter: false - memoryBound: false - scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' - stackPtrOffsetReg: '$sgpr32' - occupancy: 10 - -body: | - bb.0: - liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80 - ; CHECK-LABEL: name: form_vgpr_clause_high_sgpr_pressure - ; CHECK: early-clobber %82:vgpr_32, early-clobber %83:vgpr_32 = BUNDLE [[REG:%[0-9]+]], implicit $exec { - ; CHECK: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) - ; CHECK: [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG]], 8, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) - ; CHECK: } - %0:vreg_64 = COPY $vgpr0_vgpr1 - %1:sgpr_32 = COPY $sgpr0 - %2:sgpr_32 = COPY $sgpr1 - %3:sgpr_32 = COPY $sgpr2 - %4:sgpr_32 = COPY $sgpr3 - %5:sgpr_32 = COPY $sgpr4 - %6:sgpr_32 = COPY $sgpr5 - %7:sgpr_32 = COPY $sgpr6 - %8:sgpr_32 = COPY $sgpr7 - %9:sgpr_32 = COPY $sgpr8 - %10:sgpr_32 = COPY $sgpr9 - %11:sgpr_32 = COPY $sgpr10 - %12:sgpr_32 = COPY $sgpr11 - %13:sgpr_32 = COPY $sgpr12 - %14:sgpr_32 = COPY $sgpr13 - %15:sgpr_32 = COPY $sgpr14 - %16:sgpr_32 = COPY $sgpr15 - %17:sgpr_32 = COPY $sgpr16 - %18:sgpr_32 = COPY $sgpr17 - %19:sgpr_32 = COPY $sgpr18 - %20:sgpr_32 = COPY $sgpr19 - %21:sgpr_32 = COPY $sgpr20 - %22:sgpr_32 = COPY $sgpr21 - %23:sgpr_32 = COPY $sgpr22 - %24:sgpr_32 = COPY $sgpr23 - %25:sgpr_32 = COPY $sgpr24 - %26:sgpr_32 = COPY $sgpr25 - %27:sgpr_32 = COPY $sgpr26 - %28:sgpr_32 = COPY $sgpr27 - %29:sgpr_32 = COPY $sgpr28 - %30:sgpr_32 = COPY $sgpr29 - %31:sgpr_32 = COPY $sgpr30 - %32:sgpr_32 = COPY $sgpr31 - %33:sgpr_32 = COPY $sgpr32 - %34:sgpr_32 = COPY $sgpr33 - %35:sgpr_32 = COPY $sgpr34 - %36:sgpr_32 = COPY $sgpr35 - %37:sgpr_32 = COPY $sgpr36 - %38:sgpr_32 = COPY $sgpr37 - %39:sgpr_32 = COPY $sgpr38 - %40:sgpr_32 = COPY $sgpr39 - %41:sgpr_32 = COPY $sgpr40 - %42:sgpr_32 = COPY $sgpr41 - %43:sgpr_32 = COPY $sgpr42 - %44:sgpr_32 = COPY $sgpr43 - %45:sgpr_32 = COPY $sgpr44 - %46:sgpr_32 = COPY $sgpr45 - %47:sgpr_32 = COPY $sgpr46 - %48:sgpr_32 = COPY $sgpr47 - %49:sgpr_32 = COPY $sgpr48 - %50:sgpr_32 = COPY $sgpr49 - %51:sgpr_32 = COPY $sgpr50 - %52:sgpr_32 = COPY $sgpr51 - %53:sgpr_32 = COPY $sgpr52 - %54:sgpr_32 = COPY $sgpr53 - %55:sgpr_32 = COPY $sgpr54 - %56:sgpr_32 = COPY $sgpr55 - %57:sgpr_32 = COPY $sgpr56 - %58:sgpr_32 = COPY $sgpr57 - %59:sgpr_32 = COPY $sgpr58 - %60:sgpr_32 = COPY $sgpr59 - %61:sgpr_32 = COPY $sgpr60 - %62:sgpr_32 = COPY $sgpr61 - %63:sgpr_32 = COPY $sgpr62 - %64:sgpr_32 = COPY $sgpr63 - %65:sgpr_32 = COPY $sgpr64 - %66:sgpr_32 = COPY $sgpr65 - %67:sgpr_32 = COPY $sgpr66 - %68:sgpr_32 = COPY $sgpr67 - %69:sgpr_32 = COPY $sgpr68 - %70:sgpr_32 = COPY $sgpr69 - %71:sgpr_32 = COPY $sgpr70 - %72:sgpr_32 = COPY $sgpr71 - %73:sgpr_32 = COPY $sgpr72 - %74:sgpr_32 = COPY $sgpr73 - %75:sgpr_32 = COPY $sgpr74 - %76:sgpr_32 = COPY $sgpr75 - %77:sgpr_32 = COPY $sgpr76 - %78:sgpr_32 = COPY $sgpr77 - %79:sgpr_32 = COPY $sgpr78 - %80:sgpr_32 = COPY $sgpr79 - %81:sgpr_32 = COPY $sgpr80 - %82:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec :: (load 4, align 4, addrspace 1) - %83:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec :: (load 4, align 4, addrspace 1) - S_NOP 0, implicit %1 - S_NOP 0, implicit %2 - S_NOP 0, implicit %3 - S_NOP 0, implicit %4 - S_NOP 0, implicit %5 - S_NOP 0, implicit %6 - S_NOP 0, implicit %7 - S_NOP 0, implicit %8 - S_NOP 0, implicit %9 - S_NOP 0, implicit %10 - S_NOP 0, implicit %11 - S_NOP 0, implicit %12 - S_NOP 0, implicit %13 - S_NOP 0, implicit %14 - S_NOP 0, implicit %15 - S_NOP 0, implicit %16 - S_NOP 0, implicit %17 - S_NOP 0, implicit %18 - S_NOP 0, implicit %19 - S_NOP 0, implicit %20 - S_NOP 0, implicit %21 - S_NOP 0, implicit %22 - S_NOP 0, implicit %23 - S_NOP 0, implicit %24 - S_NOP 0, implicit %25 - S_NOP 0, implicit %26 - S_NOP 0, implicit %27 - S_NOP 0, implicit %28 - S_NOP 0, implicit %29 - S_NOP 0, implicit %30 - S_NOP 0, implicit %31 - S_NOP 0, implicit %32 - S_NOP 0, implicit %33 - S_NOP 0, implicit %34 - S_NOP 0, implicit %35 - S_NOP 0, implicit %36 - S_NOP 0, implicit %37 - S_NOP 0, implicit %38 - S_NOP 0, implicit %39 - S_NOP 0, implicit %40 - S_NOP 0, implicit %41 - S_NOP 0, implicit %42 - S_NOP 0, implicit %43 - S_NOP 0, implicit %44 - S_NOP 0, implicit %45 - S_NOP 0, implicit %46 - S_NOP 0, implicit %47 - S_NOP 0, implicit %48 - S_NOP 0, implicit %49 - S_NOP 0, implicit %50 - S_NOP 0, implicit %51 - S_NOP 0, implicit %52 - S_NOP 0, implicit %53 - S_NOP 0, implicit %54 - S_NOP 0, implicit %55 - S_NOP 0, implicit %56 - S_NOP 0, implicit %57 - S_NOP 0, implicit %58 - S_NOP 0, implicit %59 - S_NOP 0, implicit %60 - S_NOP 0, implicit %61 - S_NOP 0, implicit %62 - S_NOP 0, implicit %63 - S_NOP 0, implicit %64 - S_NOP 0, implicit %65 - S_NOP 0, implicit %66 - S_NOP 0, implicit %67 - S_NOP 0, implicit %68 - S_NOP 0, implicit %69 - S_NOP 0, implicit %70 - S_NOP 0, implicit %71 - S_NOP 0, implicit %72 - S_NOP 0, implicit %73 - S_NOP 0, implicit %74 - S_NOP 0, implicit %75 - S_NOP 0, implicit %76 - S_NOP 0, implicit %77 - S_NOP 0, implicit %78 - S_NOP 0, implicit %79 - S_NOP 0, implicit %80 - S_NOP 0, implicit %81 - S_ENDPGM 0, implicit %82, implicit %83 -... -- 2.7.4