winsys/amdgpu: pad gfx and compute IBs with a single NOP packet

author Marek Olšák <marek.olsak@amd.com>

Mon, 7 Aug 2023 04:59:05 +0000 (00:59 -0400)

committer Marge Bot <emma+marge@anholt.net>

Sun, 1 Oct 2023 08:45:22 +0000 (08:45 +0000)
author Marek Olšák <marek.olsak@amd.com>
Mon, 7 Aug 2023 04:59:05 +0000 (00:59 -0400)
committer Marge Bot <emma+marge@anholt.net>
Sun, 1 Oct 2023 08:45:22 +0000 (08:45 +0000)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c

index fd48e6c..ff042ad 100644 (file)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1806,6 +1806,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
     struct amdgpu_winsys *ws = cs->ws;
     int error_code = 0;
     uint32_t ib_pad_dw_mask = ws->info.ib_pad_dw_mask[cs->ip_type];
+   unsigned alignment = ws->info.ip[cs->ip_type].ib_size_alignment / 4;
  
     rcs->current.max_dw += amdgpu_cs_epilog_dws(cs);
  
@@ -1822,13 +1823,23 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs,
        break;
     case AMD_IP_GFX:
     case AMD_IP_COMPUTE:
-      if (ws->info.gfx_ib_pad_with_type2) {
-         while (rcs->current.cdw & ib_pad_dw_mask)
+      if (rcs->current.cdw % alignment) {
+         int remaining = alignment - rcs->current.cdw % alignment;
+
+         /* Only pad by 1 dword with the type-2 NOP if necessary. */
+         if (remaining == 1 && ws->info.gfx_ib_pad_with_type2) {
              radeon_emit(rcs, PKT2_NOP_PAD);
-      } else {
-         while (rcs->current.cdw & ib_pad_dw_mask)
-            radeon_emit(rcs, PKT3_NOP_PAD);
+         } else {
+            /* Pad with a single NOP packet to minimize CP overhead because NOP is a variable-sized
+             * packet. The size of the packet body after the header is always count + 1.
+             * If count == -1, there is no packet body. NOP is the only packet that can have
+             * count == -1, which is the definition of PKT3_NOP_PAD (count == 0x3fff means -1).
+             */
+            radeon_emit(rcs, PKT3(PKT3_NOP, remaining - 2, 0));
+            rcs->current.cdw += remaining - 1;
+         }
        }
+      assert(rcs->current.cdw % alignment == 0);
        if (cs->ip_type == AMD_IP_GFX)
           ws->gfx_ib_size_counter += (rcs->prev_dw + rcs->current.cdw) * 4;
        break;
author	Marek Olšák <marek.olsak@amd.com>
	Mon, 7 Aug 2023 04:59:05 +0000 (00:59 -0400)
committer	Marge Bot <emma+marge@anholt.net>
	Sun, 1 Oct 2023 08:45:22 +0000 (08:45 +0000)