nvk: Implement CS invocations statistics queries
authorFaith Ekstrand <faith.ekstrand@collabora.com>
Tue, 31 Jan 2023 02:12:02 +0000 (20:12 -0600)
committerMarge Bot <emma+marge@anholt.net>
Fri, 4 Aug 2023 21:32:01 +0000 (21:32 +0000)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24326>

src/nouveau/vulkan/nvk_cmd_dispatch.c
src/nouveau/vulkan/nvk_mme.c
src/nouveau/vulkan/nvk_mme.h
src/nouveau/vulkan/nvk_query_pool.c

index 770166d..09ae88a 100644 (file)
@@ -71,6 +71,18 @@ nvk_cmd_bind_compute_pipeline(struct nvk_cmd_buffer *cmd,
    cmd->state.cs.pipeline = pipeline;
 }
 
+static uint32_t
+nvk_compute_local_size(struct nvk_cmd_buffer *cmd)
+{
+   const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline;
+   const struct nvk_shader *shader =
+      &pipeline->base.shaders[MESA_SHADER_COMPUTE];
+
+   return shader->cp.block_size[0] *
+          shader->cp.block_size[1] *
+          shader->cp.block_size[2];
+}
+
 static uint64_t
 nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
                         uint64_t *root_desc_addr_out)
@@ -121,6 +133,36 @@ nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
    return qmd_addr;
 }
 
+static void
+nvk_build_mme_add_cs_invocations(struct mme_builder *b,
+                                 struct mme_value64 count)
+{
+   struct mme_value accum_hi = mme_state(b,
+      NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_HI));
+   struct mme_value accum_lo = mme_state(b,
+      NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_LO));
+   struct mme_value64 accum = mme_value64(accum_lo, accum_hi);
+
+   accum = mme_add64(b, accum, count);
+
+   STATIC_ASSERT(NVK_MME_SCRATCH_CS_INVOCATIONS_HI + 1 ==
+                 NVK_MME_SCRATCH_CS_INVOCATIONS_LO);
+
+   mme_mthd(b, NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_HI));
+   mme_emit(b, accum.hi);
+   mme_emit(b, accum.lo);
+}
+
+void
+nvk_mme_add_cs_invocations(struct nvk_device *dev, struct mme_builder *b)
+{
+   struct mme_value count_hi = mme_load(b);
+   struct mme_value count_lo = mme_load(b);
+   struct mme_value64 count = mme_value64(count_lo, count_hi);
+
+   nvk_build_mme_add_cs_invocations(b, count);
+}
+
 VKAPI_ATTR void VKAPI_CALL
 nvk_CmdDispatch(VkCommandBuffer commandBuffer,
                 uint32_t groupCountX,
@@ -138,7 +180,16 @@ nvk_CmdDispatch(VkCommandBuffer commandBuffer,
    if (unlikely(qmd_addr == 0))
       return;
 
-   struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
+   const uint32_t local_size = nvk_compute_local_size(cmd);
+   const uint64_t cs_invocations =
+      (uint64_t)local_size * (uint64_t)groupCountX *
+      (uint64_t)groupCountY * (uint64_t)groupCountZ;
+
+   struct nv_push *p = nvk_cmd_buffer_push(cmd, 9);
+
+   P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS));
+   P_INLINE_DATA(p, cs_invocations >> 32);
+   P_INLINE_DATA(p, cs_invocations);
 
    P_MTHD(p, NVA0C0, INVALIDATE_SHADER_CACHES_NO_WFI);
    P_NVA0C0_INVALIDATE_SHADER_CACHES_NO_WFI(p, {
@@ -189,6 +240,7 @@ mme_store_global_vec3(struct mme_builder *b,
 void
 nvk_mme_dispatch_indirect(struct nvk_device *dev, struct mme_builder *b)
 {
+   struct mme_value local_size = mme_load(b);
    struct mme_value64 dispatch_addr = mme_load_addr64(b);
    struct mme_value64 root_desc_addr = mme_load_addr64(b);
    struct mme_value64 qmd_addr = mme_load_addr64(b);
@@ -203,6 +255,10 @@ nvk_mme_dispatch_indirect(struct nvk_device *dev, struct mme_builder *b)
    struct mme_value group_count_y = mme_load(b);
    struct mme_value group_count_z = mme_load(b);
 
+   struct mme_value64 cs1 = mme_umul_32x32_64(b, local_size, group_count_x);
+   struct mme_value64 cs2 = mme_umul_32x32_64(b, group_count_y, group_count_z);
+   nvk_build_mme_add_cs_invocations(b, mme_mul64(b, cs1, cs2));
+
    mme_store_global_vec3(b, qmd_addr, qmd_size_offset,
                          group_count_x, group_count_y, group_count_z);
    mme_store_global_vec3(b, root_desc_addr, root_desc_size_offset,
@@ -224,10 +280,11 @@ nvk_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
    if (unlikely(qmd_addr == 0))
       return;
 
-   struct nv_push *p = nvk_cmd_buffer_push(cmd, 15);
+   struct nv_push *p = nvk_cmd_buffer_push(cmd, 16);
 
    P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB);
    P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DISPATCH_INDIRECT));
+   P_INLINE_DATA(p, nvk_compute_local_size(cmd));
    P_INLINE_DATA(p, dispatch_addr >> 32);
    P_INLINE_DATA(p, dispatch_addr);
    P_INLINE_DATA(p, root_desc_addr >> 32);
index 92d63da..75319f4 100644 (file)
@@ -9,7 +9,9 @@ static const nvk_mme_builder_func mme_builders[NVK_MME_COUNT] = {
    [NVK_MME_DRAW_INDEXED]           = nvk_mme_draw_indexed,
    [NVK_MME_DRAW_INDIRECT]          = nvk_mme_draw_indirect,
    [NVK_MME_DRAW_INDEXED_INDIRECT]  = nvk_mme_draw_indexed_indirect,
+   [NVK_MME_ADD_CS_INVOCATIONS]     = nvk_mme_add_cs_invocations,
    [NVK_MME_DISPATCH_INDIRECT]      = nvk_mme_dispatch_indirect,
+   [NVK_MME_WRITE_CS_INVOCATIONS]   = nvk_mme_write_cs_invocations,
    [NVK_MME_COPY_QUERIES]           = nvk_mme_copy_queries,
 };
 
index 2371e47..1ac53e3 100644 (file)
@@ -12,11 +12,18 @@ enum nvk_mme {
    NVK_MME_DRAW_INDEXED,
    NVK_MME_DRAW_INDIRECT,
    NVK_MME_DRAW_INDEXED_INDIRECT,
+   NVK_MME_ADD_CS_INVOCATIONS,
    NVK_MME_DISPATCH_INDIRECT,
+   NVK_MME_WRITE_CS_INVOCATIONS,
    NVK_MME_COPY_QUERIES,
    NVK_MME_COUNT,
 };
 
+enum nvk_mme_scratch {
+   NVK_MME_SCRATCH_CS_INVOCATIONS_HI,
+   NVK_MME_SCRATCH_CS_INVOCATIONS_LO,
+};
+
 typedef void (*nvk_mme_builder_func)(struct nvk_device *dev,
                                      struct mme_builder *b);
 
@@ -30,7 +37,9 @@ void nvk_mme_draw_indexed(struct nvk_device *dev, struct mme_builder *b);
 void nvk_mme_draw_indirect(struct nvk_device *dev, struct mme_builder *b);
 void nvk_mme_draw_indexed_indirect(struct nvk_device *dev,
                                    struct mme_builder *b);
+void nvk_mme_add_cs_invocations(struct nvk_device *dev, struct mme_builder *b);
 void nvk_mme_dispatch_indirect(struct nvk_device *dev, struct mme_builder *b);
+void nvk_mme_write_cs_invocations(struct nvk_device *dev, struct mme_builder *b);
 void nvk_mme_copy_queries(struct nvk_device *dev, struct mme_builder *b);
 
 #endif /* NVK_MME_H */
index a35da9a..3bda1ad 100644 (file)
@@ -277,6 +277,32 @@ static const struct nvk_3d_stat_query nvk_3d_stat_queries[] = {{
 }};
 
 static void
+mme_store_global(struct mme_builder *b,
+                 struct mme_value64 addr,
+                 struct mme_value v)
+{
+   mme_mthd(b, NV9097_SET_REPORT_SEMAPHORE_A);
+   mme_emit_addr64(b, addr);
+   mme_emit(b, v);
+   mme_emit(b, mme_imm(0x10000000));
+}
+
+void
+nvk_mme_write_cs_invocations(struct nvk_device *dev, struct mme_builder *b)
+{
+   struct mme_value64 dst_addr = mme_load_addr64(b);
+
+   struct mme_value accum_hi = mme_state(b,
+      NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_HI));
+   struct mme_value accum_lo = mme_state(b,
+      NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_LO));
+   struct mme_value64 accum = mme_value64(accum_lo, accum_hi);
+
+   mme_store_global(b, dst_addr, accum.lo);
+   mme_store_global(b, mme_add64(b, dst_addr, mme_imm64(4)), accum.hi);
+}
+
+static void
 nvk_cmd_begin_end_query(struct nvk_cmd_buffer *cmd,
                         struct nvk_query_pool *pool,
                         uint32_t query, uint32_t index,
@@ -317,16 +343,22 @@ nvk_cmd_begin_end_query(struct nvk_cmd_buffer *cmd,
          /* The 3D stat queries array MUST be sorted */
          assert(!(stats_left & (sq->flag - 1)));
 
-         P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
-         P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
-         P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
-         P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
-         P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
-            .operation = OPERATION_REPORT_ONLY,
-            .pipeline_location = sq->loc,
-            .report = sq->report,
-            .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
-         });
+         if (sq->flag == VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) {
+            P_1INC(p, NVC597, CALL_MME_MACRO(NVK_MME_WRITE_CS_INVOCATIONS));
+            P_INLINE_DATA(p, report_addr >> 32);
+            P_INLINE_DATA(p, report_addr);
+         } else {
+            P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
+            P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
+            P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
+            P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
+            P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
+               .operation = OPERATION_REPORT_ONLY,
+               .pipeline_location = sq->loc,
+               .report = sq->report,
+               .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
+            });
+         }
 
          report_addr += 2 * sizeof(struct nvk_query_report);
          stats_left &= ~sq->flag;
@@ -500,17 +532,6 @@ nvk_GetQueryPoolResults(VkDevice device,
    return status;
 }
 
-static void
-mme_store_global(struct mme_builder *b,
-                 struct mme_value64 addr,
-                 struct mme_value v)
-{
-   mme_mthd(b, NV9097_SET_REPORT_SEMAPHORE_A);
-   mme_emit_addr64(b, addr);
-   mme_emit(b, v);
-   mme_emit(b, mme_imm(0x10000000));
-}
-
 void
 nvk_mme_copy_queries(struct nvk_device *dev, struct mme_builder *b)
 {