iris: Add support for scratch on XeHP

author Jason Ekstrand <jason@jlekstrand.net>

Tue, 20 Oct 2020 19:27:58 +0000 (14:27 -0500)

committer Marge Bot <eric+marge@anholt.net>

Fri, 25 Jun 2021 00:18:29 +0000 (00:18 +0000)
author Jason Ekstrand <jason@jlekstrand.net>
Tue, 20 Oct 2020 19:27:58 +0000 (14:27 -0500)
committer Marge Bot <eric+marge@anholt.net>
Fri, 25 Jun 2021 00:18:29 +0000 (00:18 +0000)
diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c

index 7bec71f..274d0c0 100644 (file)
--- a/src/gallium/drivers/iris/iris_context.c
+++ b/src/gallium/drivers/iris/iris_context.c
@@ -223,6 +223,10 @@ iris_destroy_context(struct pipe_context *ctx)
     clear_dirty_dmabuf_set(ice);
  
     screen->vtbl.destroy_state(ice);
+
+   for (unsigned i = 0; i < ARRAY_SIZE(ice->shaders.scratch_surfs); i++)
+      pipe_resource_reference(&ice->shaders.scratch_surfs[i].res, NULL);
+
     iris_destroy_program_cache(ice);
     iris_destroy_border_color_pool(ice);
     if (screen->measure.config)
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h

index 18a6ff6..1e7d4e9 100644 (file)
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -655,6 +655,11 @@ struct iris_context {
         * and shader stage.
         */
        struct iris_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
+
+      /**
+       * Scratch buffer surface states on Gfx12.5+
+       */
+      struct iris_state_ref scratch_surfs[1 << 4];
     } shaders;
  
     struct intel_perf_context *perf_ctx;
@@ -895,6 +900,8 @@ const struct shader_info *iris_get_shader_info(const struct iris_context *ice,
  struct iris_bo *iris_get_scratch_space(struct iris_context *ice,
                                         unsigned per_thread_scratch,
                                         gl_shader_stage stage);
+const struct iris_state_ref *iris_get_scratch_surf(struct iris_context *ice,
+                                                   unsigned per_thread_scratch);
  uint32_t iris_group_index_to_bti(const struct iris_binding_table *bt,
                                   enum iris_surface_group group,
                                   uint32_t index);
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c

index 9e5ae7a..ddf252e 100644 (file)
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -2150,6 +2150,15 @@ iris_get_scratch_space(struct iris_context *ice,
  
     unsigned encoded_size = ffs(per_thread_scratch) - 11;
     assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
+   assert(per_thread_scratch == 1 << (encoded_size + 10));
+
+   /* On GFX version 12.5, scratch access changed to a surface-based model.
+    * Instead of each shader type having its own layout based on IDs passed
+    * from the relevant fixed-function unit, all scratch access is based on
+    * thread IDs like it always has been for compute.
+    */
+   if (devinfo->verx10 >= 125)
+      stage = MESA_SHADER_COMPUTE;
  
     struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
  
@@ -2168,7 +2177,9 @@ iris_get_scratch_space(struct iris_context *ice,
      * in the base configuration.
      */
     unsigned subslice_total = screen->subslice_total;
-   if (devinfo->ver == 12)
+   if (devinfo->verx10 == 125)
+      subslice_total = 32;
+   else if (devinfo->ver == 12)
        subslice_total = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);
     else if (devinfo->ver == 11)
        subslice_total = 8;
@@ -2213,6 +2224,42 @@ iris_get_scratch_space(struct iris_context *ice,
     return *bop;
  }
  
+const struct iris_state_ref *
+iris_get_scratch_surf(struct iris_context *ice,
+                      unsigned per_thread_scratch)
+{
+   struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+   ASSERTED const struct intel_device_info *devinfo = &screen->devinfo;
+
+   assert(devinfo->verx10 >= 125);
+
+   unsigned encoded_size = ffs(per_thread_scratch) - 11;
+   assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
+   assert(per_thread_scratch == 1 << (encoded_size + 10));
+
+   struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
+
+   if (ref->res)
+      return ref;
+
+   struct iris_bo *scratch_bo =
+      iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
+
+   void *map = upload_state(ice->state.bindless_uploader, ref,
+                            screen->isl_dev.ss.size, 64);
+
+   isl_buffer_fill_state(&screen->isl_dev, map,
+                         .address = scratch_bo->gtt_offset,
+                         .size_B = scratch_bo->size,
+                         .format = ISL_FORMAT_RAW,
+                         .swizzle = ISL_SWIZZLE_IDENTITY,
+                         .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
+                         .stride_B = per_thread_scratch,
+                         .is_scratch = true);
+
+   return ref;
+}
+
  /* ------------------------------------------------------------------- */
  
  /**
diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c

index 9e14fb2..f520ac3 100644 (file)
--- a/src/gallium/drivers/iris/iris_state.c
+++ b/src/gallium/drivers/iris/iris_state.c
@@ -4317,9 +4317,22 @@ KSP(const struct iris_compiled_shader *shader)
     pkt.Enable           = true;                                           \
                                                                            \
     if (prog_data->total_scratch) {                                        \
-      pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;     \
+      INIT_THREAD_SCRATCH_SIZE(pkt)                                       \
     }
  
+#if GFX_VERx10 >= 125
+#define INIT_THREAD_SCRATCH_SIZE(pkt)
+#define MERGE_SCRATCH_ADDR(name)                                          \
+{                                                                         \
+   uint32_t pkt2[GENX(name##_length)] = {0};                              \
+   _iris_pack_command(batch, GENX(name), pkt2, p) {                       \
+      p.ScratchSpaceBuffer = scratch_addr >> 4;                           \
+   }                                                                      \
+   iris_emit_merge(batch, pkt, pkt2, GENX(name##_length));                \
+}
+#else
+#define INIT_THREAD_SCRATCH_SIZE(pkt)                                     \
+   pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
  #define MERGE_SCRATCH_ADDR(name)                                          \
  {                                                                         \
     uint32_t pkt2[GENX(name##_length)] = {0};                              \
@@ -4329,6 +4342,7 @@ KSP(const struct iris_compiled_shader *shader)
     }                                                                      \
     iris_emit_merge(batch, pkt, pkt2, GENX(name##_length));                \
  }
+#endif
  
  
  /**
@@ -4515,8 +4529,9 @@ iris_store_fs_state(const struct intel_device_info *devinfo,
        ps.PositionXYOffsetSelect =
           wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
  
-      if (prog_data->total_scratch)
-         ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
+      if (prog_data->total_scratch) {
+         INIT_THREAD_SCRATCH_SIZE(ps);
+      }
     }
  
     iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
@@ -5078,7 +5093,18 @@ pin_scratch_space(struct iris_context *ice,
           iris_get_scratch_space(ice, prog_data->total_scratch, stage);
        iris_use_pinned_bo(batch, scratch_bo, true, IRIS_DOMAIN_NONE);
  
+#if GFX_VERx10 >= 125
+      const struct iris_state_ref *ref =
+         iris_get_scratch_surf(ice, prog_data->total_scratch);
+      iris_use_pinned_bo(batch, iris_resource_bo(ref->res),
+                         false, IRIS_DOMAIN_NONE);
+      scratch_addr = ref->offset +
+                     iris_resource_bo(ref->res)->gtt_offset -
+                     IRIS_MEMZONE_BINDLESS_START;
+      assert((scratch_addr & 0x3f) == 0 && scratch_addr < (1 << 26));
+#else
        scratch_addr = scratch_bo->gtt_offset;
+#endif
     }
  
     return scratch_addr;
@@ -5915,8 +5941,12 @@ iris_upload_dirty_render_state(struct iris_context *ice,
                 ps.KernelStartPointer2 = KSP(shader) +
                    brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
  
+#if GFX_VERx10 >= 125
+               ps.ScratchSpaceBuffer = scratch_addr >> 4;
+#else
                 ps.ScratchSpaceBasePointer =
                    rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE);
+#endif
              }
  
              uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0};
@@ -6829,11 +6859,12 @@ iris_upload_compute_walker(struct iris_context *ice,
  
     if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
        iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
-         /* TODO: Enable gfx12-hp scratch support*/
-         assert(prog_data->total_scratch == 0);
-
           cfe.MaximumNumberofThreads =
              devinfo->max_cs_threads * screen->subslice_total - 1;
+         if (prog_data->total_scratch > 0) {
+            cfe.ScratchSpaceBuffer =
+               iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4;
+         }
        }
     }
author	Jason Ekstrand <jason@jlekstrand.net>
	Tue, 20 Oct 2020 19:27:58 +0000 (14:27 -0500)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 25 Jun 2021 00:18:29 +0000 (00:18 +0000)
src/gallium/drivers/iris/iris_context.c		patch \| blob \| history
src/gallium/drivers/iris/iris_context.h		patch \| blob \| history
src/gallium/drivers/iris/iris_program.c		patch \| blob \| history
src/gallium/drivers/iris/iris_state.c		patch \| blob \| history