anv,iris: program the maximum number of threads on compute queue init
authorRohan Garg <rohan.garg@intel.com>
Thu, 29 Jun 2023 12:24:55 +0000 (14:24 +0200)
committerMarge Bot <emma+marge@anholt.net>
Fri, 21 Jul 2023 10:46:08 +0000 (10:46 +0000)
Fixes: 90a39cac87 ("intel/blorp: Emit compute program based on BLORP_BATCH_USE_COMPUTE")
Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23934>

src/gallium/drivers/iris/iris_state.c
src/intel/blorp/blorp_genX_exec.h
src/intel/vulkan/genX_state.c

index 881a534..644bb54 100644 (file)
@@ -1392,6 +1392,13 @@ iris_init_compute_context(struct iris_batch *batch)
    init_aux_map_state(batch);
 #endif
 
+#if GFX_VERx10 >= 125
+   iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+#endif
+
    iris_batch_sync_region_end(batch);
 }
 
index c352406..9d9e205 100644 (file)
@@ -2161,12 +2161,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
 #endif /* GFX_VER >= 7 */
 
 #if GFX_VERx10 >= 125
-
-   blorp_emit(batch, GENX(CFE_STATE), cfe) {
-      cfe.MaximumNumberofThreads =
-         devinfo->max_cs_threads * devinfo->subslice_total;
-   }
-
    assert(cs_prog_data->push.per_thread.regs == 0);
    blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
       cw.SIMDSize                       = dispatch.simd_size / 16;
index 8cd7a6b..efd98d2 100644 (file)
@@ -343,6 +343,7 @@ static VkResult
 init_render_queue_state(struct anv_queue *queue)
 {
    struct anv_device *device = queue->device;
+   UNUSED const struct intel_device_info *devinfo = queue->device->info;
    uint32_t cmds[128];
    struct anv_batch batch = {
       .start = cmds,
@@ -586,6 +587,20 @@ init_render_queue_state(struct anv_queue *queue)
 #if GFX_VERx10 >= 125
    anv_batch_emit(&batch, GENX(3DSTATE_MESH_CONTROL), zero);
    anv_batch_emit(&batch, GENX(3DSTATE_TASK_CONTROL), zero);
+   genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
+                                       ANV_NULL_ADDRESS,
+                                       0,
+                                       ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
+   genX(emit_pipeline_select)(&batch, GPGPU);
+   anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+   genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
+                                       ANV_NULL_ADDRESS,
+                                       0,
+                                       ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
+   genX(emit_pipeline_select)(&batch, _3D);
 #endif
 
    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
@@ -599,6 +614,7 @@ static VkResult
 init_compute_queue_state(struct anv_queue *queue)
 {
    struct anv_batch batch;
+   UNUSED const struct intel_device_info *devinfo = queue->device->info;
 
    uint32_t cmds[64];
    batch.start = batch.next = cmds;
@@ -626,6 +642,13 @@ init_compute_queue_state(struct anv_queue *queue)
 
    init_common_queue_state(queue, &batch);
 
+#if GFX_VERx10 >= 125
+   anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+#endif
+
    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
 
    assert(batch.next <= batch.end);