From: Marek Olšák Date: Mon, 7 Aug 2023 04:59:05 +0000 (-0400) Subject: winsys/amdgpu: pad gfx and compute IBs with a single NOP packet X-Git-Tag: upstream/23.3.3~1431 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=43e72850697e42cdf6d794407a5523a5e481bd41;p=platform%2Fupstream%2Fmesa.git winsys/amdgpu: pad gfx and compute IBs with a single NOP packet to minimize CP overhead Reviewed-by: Timur Kristóf Part-of: --- diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index fd48e6c..ff042ad 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1806,6 +1806,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, struct amdgpu_winsys *ws = cs->ws; int error_code = 0; uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type]; + unsigned alignment = ws->info.ip[cs->ip_type].ib_size_alignment / 4; rcs->current.max_dw += amdgpu_cs_epilog_dws(cs); @@ -1822,13 +1823,23 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, break; case AMD_IP_GFX: case AMD_IP_COMPUTE: - if (ws->info.gfx_ib_pad_with_type2) { - while (rcs->current.cdw & ib_pad_dw_mask) + if (rcs->current.cdw % alignment) { + int remaining = alignment - rcs->current.cdw % alignment; + + /* Only pad by 1 dword with the type-2 NOP if necessary. */ + if (remaining == 1 && ws->info.gfx_ib_pad_with_type2) { radeon_emit(rcs, PKT2_NOP_PAD); - } else { - while (rcs->current.cdw & ib_pad_dw_mask) - radeon_emit(rcs, PKT3_NOP_PAD); + } else { + /* Pad with a single NOP packet to minimize CP overhead because NOP is a variable-sized + * packet. The size of the packet body after the header is always count + 1. + * If count == -1, there is no packet body. NOP is the only packet that can have + * count == -1, which is the definition of PKT3_NOP_PAD (count == 0x3fff means -1). + */ + radeon_emit(rcs, PKT3(PKT3_NOP, remaining - 2, 0)); + rcs->current.cdw += remaining - 1; + } } + assert(rcs->current.cdw % alignment == 0); if (cs->ip_type == AMD_IP_GFX) ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4; break;