From: Rohan Garg <rohan.garg@intel.com>
Date: Thu, 29 Jun 2023 12:24:55 +0000 (+0200)
Subject: anv,iris: program the maximum number of threads on compute queue init
X-Git-Tag: upstream/23.3.3~5244
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e7e7042093cadf67996edcc515c41c0dc349ea76;p=platform%2Fupstream%2Fmesa.git

anv,iris: program the maximum number of threads on compute queue init

Fixes: 90a39cac87 ("intel/blorp: Emit compute program based on BLORP_BATCH_USE_COMPUTE")
Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23934>
---

diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
index 881a534..644bb54 100644
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -1392,6 +1392,13 @@ iris_init_compute_context(struct iris_batch *batch)
    init_aux_map_state(batch);
 #endif
 
+#if GFX_VERx10 >= 125
+   iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+#endif
+
    iris_batch_sync_region_end(batch);
 }
 
diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
index c352406..9d9e205 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -2161,12 +2161,6 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
 #endif /* GFX_VER >= 7 */
 
 #if GFX_VERx10 >= 125
-
-   blorp_emit(batch, GENX(CFE_STATE), cfe) {
-      cfe.MaximumNumberofThreads =
-         devinfo->max_cs_threads * devinfo->subslice_total;
-   }
-
    assert(cs_prog_data->push.per_thread.regs == 0);
    blorp_emit(batch, GENX(COMPUTE_WALKER), cw) {
       cw.SIMDSize                       = dispatch.simd_size / 16;
diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index 8cd7a6b..efd98d2 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -343,6 +343,7 @@ static VkResult
 init_render_queue_state(struct anv_queue *queue)
 {
    struct anv_device *device = queue->device;
+   UNUSED const struct intel_device_info *devinfo = queue->device->info;
    uint32_t cmds[128];
    struct anv_batch batch = {
       .start = cmds,
@@ -586,6 +587,20 @@ init_render_queue_state(struct anv_queue *queue)
 #if GFX_VERx10 >= 125
    anv_batch_emit(&batch, GENX(3DSTATE_MESH_CONTROL), zero);
    anv_batch_emit(&batch, GENX(3DSTATE_TASK_CONTROL), zero);
+   genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
+                                       ANV_NULL_ADDRESS,
+                                       0,
+                                       ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
+   genX(emit_pipeline_select)(&batch, GPGPU);
+   anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+   genX(batch_emit_pipe_control_write)(&batch, device->info, NoWrite,
+                                       ANV_NULL_ADDRESS,
+                                       0,
+                                       ANV_PIPE_FLUSH_BITS | ANV_PIPE_INVALIDATE_BITS);
+   genX(emit_pipeline_select)(&batch, _3D);
 #endif
 
    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
@@ -599,6 +614,7 @@ static VkResult
 init_compute_queue_state(struct anv_queue *queue)
 {
    struct anv_batch batch;
+   UNUSED const struct intel_device_info *devinfo = queue->device->info;
 
    uint32_t cmds[64];
    batch.start = batch.next = cmds;
@@ -626,6 +642,13 @@ init_compute_queue_state(struct anv_queue *queue)
 
    init_common_queue_state(queue, &batch);
 
+#if GFX_VERx10 >= 125
+   anv_batch_emit(&batch, GENX(CFE_STATE), cfe) {
+      cfe.MaximumNumberofThreads =
+         devinfo->max_cs_threads * devinfo->subslice_total;
+   }
+#endif
+
    anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
 
    assert(batch.next <= batch.end);