anv: Add support for scratch on XeHP

author Jason Ekstrand <jason@jlekstrand.net>

Tue, 20 Oct 2020 21:11:45 +0000 (16:11 -0500)

committer Marge Bot <eric+marge@anholt.net>

Fri, 25 Jun 2021 00:18:29 +0000 (00:18 +0000)
author Jason Ekstrand <jason@jlekstrand.net>
Tue, 20 Oct 2020 21:11:45 +0000 (16:11 -0500)
committer Marge Bot <eric+marge@anholt.net>
Fri, 25 Jun 2021 00:18:29 +0000 (00:18 +0000)
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c

index 17b2dc3..1be8f02 100644 (file)
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -1420,6 +1420,13 @@ anv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool
              anv_device_release_bo(device, pool->bos[i][s]);
        }
     }
+
+   for (unsigned i = 0; i < 16; i++) {
+      if (pool->surf_states[i].map != NULL) {
+         anv_state_pool_free(&device->surface_state_pool,
+                             pool->surf_states[i]);
+      }
+   }
  }
  
  struct anv_bo *
@@ -1433,13 +1440,22 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
     assert(scratch_size_log2 < 16);
  
     assert(stage < ARRAY_SIZE(pool->bos));
+
+   const struct intel_device_info *devinfo = &device->info;
+
+   /* On GFX version 12.5, scratch access changed to a surface-based model.
+    * Instead of each shader type having its own layout based on IDs passed
+    * from the relevant fixed-function unit, all scratch access is based on
+    * thread IDs like it always has been for compute.
+    */
+   if (devinfo->verx10 >= 125)
+      stage = MESA_SHADER_COMPUTE;
+
     struct anv_bo *bo = p_atomic_read(&pool->bos[scratch_size_log2][stage]);
  
     if (bo != NULL)
        return bo;
  
-   const struct intel_device_info *devinfo = &device->info;
-
     unsigned subslices = MAX2(device->physical->subslice_total, 1);
  
     /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
@@ -1456,7 +1472,9 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
      * For, Gfx11+, scratch space allocation is based on the number of threads
      * in the base configuration.
      */
-   if (devinfo->ver == 12)
+   if (devinfo->verx10 == 125)
+      subslices = 32;
+   else if (devinfo->ver == 12)
        subslices = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);
     else if (devinfo->ver == 11)
        subslices = 8;
@@ -1552,6 +1570,50 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
     }
  }
  
+uint32_t
+anv_scratch_pool_get_surf(struct anv_device *device,
+                          struct anv_scratch_pool *pool,
+                          unsigned per_thread_scratch)
+{
+   if (per_thread_scratch == 0)
+      return 0;
+
+   unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048);
+   assert(scratch_size_log2 < 16);
+
+   uint32_t surf = p_atomic_read(&pool->surfs[scratch_size_log2]);
+   if (surf > 0)
+      return surf;
+
+   struct anv_bo *bo =
+      anv_scratch_pool_alloc(device, pool, MESA_SHADER_COMPUTE,
+                             per_thread_scratch);
+   struct anv_address addr = { .bo = bo };
+
+   struct anv_state state =
+      anv_state_pool_alloc(&device->surface_state_pool,
+                           device->isl_dev.ss.size, 64);
+
+   isl_buffer_fill_state(&device->isl_dev, state.map,
+                         .address = anv_address_physical(addr),
+                         .size_B = bo->size,
+                         .mocs = anv_mocs(device, bo, 0),
+                         .format = ISL_FORMAT_RAW,
+                         .swizzle = ISL_SWIZZLE_IDENTITY,
+                         .stride_B = per_thread_scratch,
+                         .is_scratch = true);
+
+   uint32_t current = p_atomic_cmpxchg(&pool->surfs[scratch_size_log2],
+                                       0, state.offset);
+   if (current) {
+      anv_state_pool_free(&device->surface_state_pool, state);
+      return current;
+   } else {
+      pool->surf_states[scratch_size_log2] = state;
+      return state.offset;
+   }
+}
+
  VkResult
  anv_bo_cache_init(struct anv_bo_cache *cache)
  {
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h

index abfa488..932bf8b 100644 (file)
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -823,6 +823,8 @@ void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
  struct anv_scratch_pool {
     /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
     struct anv_bo *bos[16][MESA_SHADER_STAGES];
+   uint32_t surfs[16];
+   struct anv_state surf_states[16];
  };
  
  void anv_scratch_pool_init(struct anv_device *device,
@@ -833,6 +835,9 @@ struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
                                        struct anv_scratch_pool *pool,
                                        gl_shader_stage stage,
                                        unsigned per_thread_scratch);
+uint32_t anv_scratch_pool_get_surf(struct anv_device *device,
+                                   struct anv_scratch_pool *pool,
+                                   unsigned per_thread_scratch);
  
  /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
  struct anv_bo_cache {
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c

index 005782f..c902ba0 100644 (file)
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1703,7 +1703,7 @@ get_sampler_count(const struct anv_shader_bin *bin)
     return MIN2(count_by_4, 4);
  }
  
-static struct anv_address
+static UNUSED struct anv_address
  get_scratch_address(struct anv_pipeline *pipeline,
                      gl_shader_stage stage,
                      const struct anv_shader_bin *bin)
@@ -1716,12 +1716,21 @@ get_scratch_address(struct anv_pipeline *pipeline,
     };
  }
  
-static uint32_t
+static UNUSED uint32_t
  get_scratch_space(const struct anv_shader_bin *bin)
  {
     return ffs(bin->prog_data->total_scratch / 2048);
  }
  
+static UNUSED uint32_t
+get_scratch_surf(struct anv_pipeline *pipeline,
+                 const struct anv_shader_bin *bin)
+{
+   return anv_scratch_pool_get_surf(pipeline->device,
+                                    &pipeline->device->scratch_pool,
+                                    bin->prog_data->total_scratch) >> 4;
+}
+
  static void
  emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
  {
@@ -1792,9 +1801,13 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
           vs_prog_data->base.cull_distance_mask;
  #endif
  
+#if GFX_VERx10 >= 125
+      vs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, vs_bin);
+#else
        vs.PerThreadScratchSpace   = get_scratch_space(vs_bin);
        vs.ScratchSpaceBasePointer =
           get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
+#endif
     }
  }
  
@@ -1849,10 +1862,13 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
           tcs_prog_data->base.base.dispatch_grf_start_reg >> 5;
  #endif
  
-
+#if GFX_VERx10 >= 125
+      hs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, tcs_bin);
+#else
        hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
        hs.ScratchSpaceBasePointer =
           get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
+#endif
  
  #if GFX_VER == 12
        /*  Patch Count threshold specifies the maximum number of patches that
@@ -1930,9 +1946,13 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
           tes_prog_data->base.cull_distance_mask;
  #endif
  
+#if GFX_VERx10 >= 125
+      ds.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, tes_bin);
+#else
        ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
        ds.ScratchSpaceBasePointer =
           get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
+#endif
     }
  }
  
@@ -1998,9 +2018,13 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline)
           gs_prog_data->base.cull_distance_mask;
  #endif
  
+#if GFX_VERx10 >= 125
+      gs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, gs_bin);
+#else
        gs.PerThreadScratchSpace   = get_scratch_space(gs_bin);
        gs.ScratchSpaceBasePointer =
           get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
+#endif
     }
  }
  
@@ -2266,9 +2290,13 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
        ps.DispatchGRFStartRegisterForConstantSetupData2 =
           brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
  
+#if GFX_VERx10 >= 125
+      ps.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, fs_bin);
+#else
        ps.PerThreadScratchSpace   = get_scratch_space(fs_bin);
        ps.ScratchSpaceBasePointer =
           get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
+#endif
     }
  }
  
@@ -2558,8 +2586,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline,
     anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) {
        cfe.MaximumNumberofThreads =
           devinfo->max_cs_threads * subslices - 1;
-      /* TODO: Enable gfx12-hp scratch support*/
-      assert(get_scratch_space(cs_bin) == 0);
+      cfe.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, cs_bin);
     }
  }
author	Jason Ekstrand <jason@jlekstrand.net>
	Tue, 20 Oct 2020 21:11:45 +0000 (16:11 -0500)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 25 Jun 2021 00:18:29 +0000 (00:18 +0000)
src/intel/vulkan/anv_allocator.c		patch \| blob \| history
src/intel/vulkan/anv_private.h		patch \| blob \| history
src/intel/vulkan/genX_pipeline.c		patch \| blob \| history