cmd->state.cs.pipeline = pipeline;
}
+static uint32_t
+nvk_compute_local_size(struct nvk_cmd_buffer *cmd)
+{
+ const struct nvk_compute_pipeline *pipeline = cmd->state.cs.pipeline;
+ const struct nvk_shader *shader =
+ &pipeline->base.shaders[MESA_SHADER_COMPUTE];
+
+ return shader->cp.block_size[0] *
+ shader->cp.block_size[1] *
+ shader->cp.block_size[2];
+}
+
static uint64_t
nvk_flush_compute_state(struct nvk_cmd_buffer *cmd,
uint64_t *root_desc_addr_out)
return qmd_addr;
}
+static void
+nvk_build_mme_add_cs_invocations(struct mme_builder *b,
+ struct mme_value64 count)
+{
+ struct mme_value accum_hi = mme_state(b,
+ NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_HI));
+ struct mme_value accum_lo = mme_state(b,
+ NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_LO));
+ struct mme_value64 accum = mme_value64(accum_lo, accum_hi);
+
+ accum = mme_add64(b, accum, count);
+
+ STATIC_ASSERT(NVK_MME_SCRATCH_CS_INVOCATIONS_HI + 1 ==
+ NVK_MME_SCRATCH_CS_INVOCATIONS_LO);
+
+ mme_mthd(b, NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_HI));
+ mme_emit(b, accum.hi);
+ mme_emit(b, accum.lo);
+}
+
+void
+nvk_mme_add_cs_invocations(struct nvk_device *dev, struct mme_builder *b)
+{
+ struct mme_value count_hi = mme_load(b);
+ struct mme_value count_lo = mme_load(b);
+ struct mme_value64 count = mme_value64(count_lo, count_hi);
+
+ nvk_build_mme_add_cs_invocations(b, count);
+}
+
VKAPI_ATTR void VKAPI_CALL
nvk_CmdDispatch(VkCommandBuffer commandBuffer,
uint32_t groupCountX,
if (unlikely(qmd_addr == 0))
return;
- struct nv_push *p = nvk_cmd_buffer_push(cmd, 6);
+ const uint32_t local_size = nvk_compute_local_size(cmd);
+ const uint64_t cs_invocations =
+ (uint64_t)local_size * (uint64_t)groupCountX *
+ (uint64_t)groupCountY * (uint64_t)groupCountZ;
+
+ struct nv_push *p = nvk_cmd_buffer_push(cmd, 9);
+
+ P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS));
+ P_INLINE_DATA(p, cs_invocations >> 32);
+ P_INLINE_DATA(p, cs_invocations);
P_MTHD(p, NVA0C0, INVALIDATE_SHADER_CACHES_NO_WFI);
P_NVA0C0_INVALIDATE_SHADER_CACHES_NO_WFI(p, {
void
nvk_mme_dispatch_indirect(struct nvk_device *dev, struct mme_builder *b)
{
+ struct mme_value local_size = mme_load(b);
struct mme_value64 dispatch_addr = mme_load_addr64(b);
struct mme_value64 root_desc_addr = mme_load_addr64(b);
struct mme_value64 qmd_addr = mme_load_addr64(b);
struct mme_value group_count_y = mme_load(b);
struct mme_value group_count_z = mme_load(b);
+ struct mme_value64 cs1 = mme_umul_32x32_64(b, local_size, group_count_x);
+ struct mme_value64 cs2 = mme_umul_32x32_64(b, group_count_y, group_count_z);
+ nvk_build_mme_add_cs_invocations(b, mme_mul64(b, cs1, cs2));
+
mme_store_global_vec3(b, qmd_addr, qmd_size_offset,
group_count_x, group_count_y, group_count_z);
mme_store_global_vec3(b, root_desc_addr, root_desc_size_offset,
if (unlikely(qmd_addr == 0))
return;
- struct nv_push *p = nvk_cmd_buffer_push(cmd, 15);
+ struct nv_push *p = nvk_cmd_buffer_push(cmd, 16);
P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB);
P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DISPATCH_INDIRECT));
+ P_INLINE_DATA(p, nvk_compute_local_size(cmd));
P_INLINE_DATA(p, dispatch_addr >> 32);
P_INLINE_DATA(p, dispatch_addr);
P_INLINE_DATA(p, root_desc_addr >> 32);
[NVK_MME_DRAW_INDEXED] = nvk_mme_draw_indexed,
[NVK_MME_DRAW_INDIRECT] = nvk_mme_draw_indirect,
[NVK_MME_DRAW_INDEXED_INDIRECT] = nvk_mme_draw_indexed_indirect,
+ [NVK_MME_ADD_CS_INVOCATIONS] = nvk_mme_add_cs_invocations,
[NVK_MME_DISPATCH_INDIRECT] = nvk_mme_dispatch_indirect,
+ [NVK_MME_WRITE_CS_INVOCATIONS] = nvk_mme_write_cs_invocations,
[NVK_MME_COPY_QUERIES] = nvk_mme_copy_queries,
};
NVK_MME_DRAW_INDEXED,
NVK_MME_DRAW_INDIRECT,
NVK_MME_DRAW_INDEXED_INDIRECT,
+ NVK_MME_ADD_CS_INVOCATIONS,
NVK_MME_DISPATCH_INDIRECT,
+ NVK_MME_WRITE_CS_INVOCATIONS,
NVK_MME_COPY_QUERIES,
NVK_MME_COUNT,
};
+enum nvk_mme_scratch {
+ NVK_MME_SCRATCH_CS_INVOCATIONS_HI,
+ NVK_MME_SCRATCH_CS_INVOCATIONS_LO,
+};
+
typedef void (*nvk_mme_builder_func)(struct nvk_device *dev,
struct mme_builder *b);
void nvk_mme_draw_indirect(struct nvk_device *dev, struct mme_builder *b);
void nvk_mme_draw_indexed_indirect(struct nvk_device *dev,
struct mme_builder *b);
+void nvk_mme_add_cs_invocations(struct nvk_device *dev, struct mme_builder *b);
void nvk_mme_dispatch_indirect(struct nvk_device *dev, struct mme_builder *b);
+void nvk_mme_write_cs_invocations(struct nvk_device *dev, struct mme_builder *b);
void nvk_mme_copy_queries(struct nvk_device *dev, struct mme_builder *b);
#endif /* NVK_MME_H */
}};
static void
+mme_store_global(struct mme_builder *b,
+ struct mme_value64 addr,
+ struct mme_value v)
+{
+ mme_mthd(b, NV9097_SET_REPORT_SEMAPHORE_A);
+ mme_emit_addr64(b, addr);
+ mme_emit(b, v);
+ mme_emit(b, mme_imm(0x10000000));
+}
+
+void
+nvk_mme_write_cs_invocations(struct nvk_device *dev, struct mme_builder *b)
+{
+ struct mme_value64 dst_addr = mme_load_addr64(b);
+
+ struct mme_value accum_hi = mme_state(b,
+ NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_HI));
+ struct mme_value accum_lo = mme_state(b,
+ NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_LO));
+ struct mme_value64 accum = mme_value64(accum_lo, accum_hi);
+
+ mme_store_global(b, dst_addr, accum.lo);
+ mme_store_global(b, mme_add64(b, dst_addr, mme_imm64(4)), accum.hi);
+}
+
+static void
nvk_cmd_begin_end_query(struct nvk_cmd_buffer *cmd,
struct nvk_query_pool *pool,
uint32_t query, uint32_t index,
/* The 3D stat queries array MUST be sorted */
assert(!(stats_left & (sq->flag - 1)));
- P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
- P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
- P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
- P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
- P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
- .operation = OPERATION_REPORT_ONLY,
- .pipeline_location = sq->loc,
- .report = sq->report,
- .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
- });
+ if (sq->flag == VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) {
+ P_1INC(p, NVC597, CALL_MME_MACRO(NVK_MME_WRITE_CS_INVOCATIONS));
+ P_INLINE_DATA(p, report_addr >> 32);
+ P_INLINE_DATA(p, report_addr);
+ } else {
+ P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
+ P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
+ P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
+ P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
+ P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
+ .operation = OPERATION_REPORT_ONLY,
+ .pipeline_location = sq->loc,
+ .report = sq->report,
+ .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
+ });
+ }
report_addr += 2 * sizeof(struct nvk_query_report);
stats_left &= ~sq->flag;
return status;
}
-static void
-mme_store_global(struct mme_builder *b,
- struct mme_value64 addr,
- struct mme_value v)
-{
- mme_mthd(b, NV9097_SET_REPORT_SEMAPHORE_A);
- mme_emit_addr64(b, addr);
- mme_emit(b, v);
- mme_emit(b, mme_imm(0x10000000));
-}
-
void
nvk_mme_copy_queries(struct nvk_device *dev, struct mme_builder *b)
{