From 7442f8635b4d7363b07152e5304c9a0c660eead4 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 3 Mar 2023 12:12:15 +0000 Subject: [PATCH] [AMDGPU] Fix invalid instid value in s_delay_alu instruction Differential Revision: https://reviews.llvm.org/D145232 --- llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp | 9 ++++++++- llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp index 29d6dfe..b7e23fa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp @@ -83,6 +83,10 @@ public: // an s_delay_alu instruction. static const unsigned TRANS_MAX = 4; + // The maximum number of SALU cycles we can encode in an s_delay_alu + // instruction. + static const unsigned SALU_CYCLES_MAX = 3; + // If it was written by a (non-TRANS) VALU, remember how many clock cycles // are left until it completes, and how many other (non-TRANS) VALU we have // seen since it was issued. @@ -120,7 +124,9 @@ public: TRANSNumVALU = 0; break; case SALU: - SALUCycles = Cycles; + // Guard against pseudo-instructions like SI_CALL which are marked as + // SALU but with a very high latency. + SALUCycles = std::min(Cycles, SALU_CYCLES_MAX); break; } } @@ -349,6 +355,7 @@ public: if (instructionWaitsForVALU(MI)) { // Forget about all outstanding VALU delays. + // TODO: This is overkill since it also forgets about SALU delays. State = DelayState(); } else if (Type != OTHER) { DelayInfo Delay; diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll index a10ae8a..c6d162c 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -119,7 +119,7 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg ; GFX11-NEXT: s_mov_b32 s3, 0 ; GFX11-NEXT: v_readfirstlane_b32 s2, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s25 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(/* invalid instid value */) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_or_b32 s2, s2, 1 ; GFX11-NEXT: s_lshr_b32 s2, s2, s30 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -- 2.7.4