From eeeea5cb873b40418c738e25e4032fb1fd9b7ac9 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Oct 2020 16:11:45 -0500 Subject: [PATCH] anv: Add support for scratch on XeHP Rework: * Jordan: Handle per_thread_scratch==0 in anv_scratch_pool_get_surf * Jordan: Update subslices in anv_scratch_pool_alloc * Jason: Clean up the patch a bit Reviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen Part-of: --- src/intel/vulkan/anv_allocator.c | 68 ++++++++++++++++++++++++++++++++++++++-- src/intel/vulkan/anv_private.h | 5 +++ src/intel/vulkan/genX_pipeline.c | 37 +++++++++++++++++++--- 3 files changed, 102 insertions(+), 8 deletions(-) diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c index 17b2dc3..1be8f02 100644 --- a/src/intel/vulkan/anv_allocator.c +++ b/src/intel/vulkan/anv_allocator.c @@ -1420,6 +1420,13 @@ anv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool anv_device_release_bo(device, pool->bos[i][s]); } } + + for (unsigned i = 0; i < 16; i++) { + if (pool->surf_states[i].map != NULL) { + anv_state_pool_free(&device->surface_state_pool, + pool->surf_states[i]); + } + } } struct anv_bo * @@ -1433,13 +1440,22 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, assert(scratch_size_log2 < 16); assert(stage < ARRAY_SIZE(pool->bos)); + + const struct intel_device_info *devinfo = &device->info; + + /* On GFX version 12.5, scratch access changed to a surface-based model. + * Instead of each shader type having its own layout based on IDs passed + * from the relevant fixed-function unit, all scratch access is based on + * thread IDs like it always has been for compute. + */ + if (devinfo->verx10 >= 125) + stage = MESA_SHADER_COMPUTE; + struct anv_bo *bo = p_atomic_read(&pool->bos[scratch_size_log2][stage]); if (bo != NULL) return bo; - const struct intel_device_info *devinfo = &device->info; - unsigned subslices = MAX2(device->physical->subslice_total, 1); /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says: @@ -1456,7 +1472,9 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, * For, Gfx11+, scratch space allocation is based on the number of threads * in the base configuration. */ - if (devinfo->ver == 12) + if (devinfo->verx10 == 125) + subslices = 32; + else if (devinfo->ver == 12) subslices = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2); else if (devinfo->ver == 11) subslices = 8; @@ -1552,6 +1570,50 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, } } +uint32_t +anv_scratch_pool_get_surf(struct anv_device *device, + struct anv_scratch_pool *pool, + unsigned per_thread_scratch) +{ + if (per_thread_scratch == 0) + return 0; + + unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048); + assert(scratch_size_log2 < 16); + + uint32_t surf = p_atomic_read(&pool->surfs[scratch_size_log2]); + if (surf > 0) + return surf; + + struct anv_bo *bo = + anv_scratch_pool_alloc(device, pool, MESA_SHADER_COMPUTE, + per_thread_scratch); + struct anv_address addr = { .bo = bo }; + + struct anv_state state = + anv_state_pool_alloc(&device->surface_state_pool, + device->isl_dev.ss.size, 64); + + isl_buffer_fill_state(&device->isl_dev, state.map, + .address = anv_address_physical(addr), + .size_B = bo->size, + .mocs = anv_mocs(device, bo, 0), + .format = ISL_FORMAT_RAW, + .swizzle = ISL_SWIZZLE_IDENTITY, + .stride_B = per_thread_scratch, + .is_scratch = true); + + uint32_t current = p_atomic_cmpxchg(&pool->surfs[scratch_size_log2], + 0, state.offset); + if (current) { + anv_state_pool_free(&device->surface_state_pool, state); + return current; + } else { + pool->surf_states[scratch_size_log2] = state; + return state.offset; + } +} + VkResult anv_bo_cache_init(struct anv_bo_cache *cache) { diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index abfa488..932bf8b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -823,6 +823,8 @@ void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo); struct anv_scratch_pool { /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */ struct anv_bo *bos[16][MESA_SHADER_STAGES]; + uint32_t surfs[16]; + struct anv_state surf_states[16]; }; void anv_scratch_pool_init(struct anv_device *device, @@ -833,6 +835,9 @@ struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, gl_shader_stage stage, unsigned per_thread_scratch); +uint32_t anv_scratch_pool_get_surf(struct anv_device *device, + struct anv_scratch_pool *pool, + unsigned per_thread_scratch); /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */ struct anv_bo_cache { diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 005782f..c902ba0 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1703,7 +1703,7 @@ get_sampler_count(const struct anv_shader_bin *bin) return MIN2(count_by_4, 4); } -static struct anv_address +static UNUSED struct anv_address get_scratch_address(struct anv_pipeline *pipeline, gl_shader_stage stage, const struct anv_shader_bin *bin) @@ -1716,12 +1716,21 @@ get_scratch_address(struct anv_pipeline *pipeline, }; } -static uint32_t +static UNUSED uint32_t get_scratch_space(const struct anv_shader_bin *bin) { return ffs(bin->prog_data->total_scratch / 2048); } +static UNUSED uint32_t +get_scratch_surf(struct anv_pipeline *pipeline, + const struct anv_shader_bin *bin) +{ + return anv_scratch_pool_get_surf(pipeline->device, + &pipeline->device->scratch_pool, + bin->prog_data->total_scratch) >> 4; +} + static void emit_3dstate_vs(struct anv_graphics_pipeline *pipeline) { @@ -1792,9 +1801,13 @@ emit_3dstate_vs(struct anv_graphics_pipeline *pipeline) vs_prog_data->base.cull_distance_mask; #endif +#if GFX_VERx10 >= 125 + vs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, vs_bin); +#else vs.PerThreadScratchSpace = get_scratch_space(vs_bin); vs.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin); +#endif } } @@ -1849,10 +1862,13 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline, tcs_prog_data->base.base.dispatch_grf_start_reg >> 5; #endif - +#if GFX_VERx10 >= 125 + hs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, tcs_bin); +#else hs.PerThreadScratchSpace = get_scratch_space(tcs_bin); hs.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin); +#endif #if GFX_VER == 12 /* Patch Count threshold specifies the maximum number of patches that @@ -1930,9 +1946,13 @@ emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline, tes_prog_data->base.cull_distance_mask; #endif +#if GFX_VERx10 >= 125 + ds.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, tes_bin); +#else ds.PerThreadScratchSpace = get_scratch_space(tes_bin); ds.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin); +#endif } } @@ -1998,9 +2018,13 @@ emit_3dstate_gs(struct anv_graphics_pipeline *pipeline) gs_prog_data->base.cull_distance_mask; #endif +#if GFX_VERx10 >= 125 + gs.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, gs_bin); +#else gs.PerThreadScratchSpace = get_scratch_space(gs_bin); gs.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin); +#endif } } @@ -2266,9 +2290,13 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, ps.DispatchGRFStartRegisterForConstantSetupData2 = brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); +#if GFX_VERx10 >= 125 + ps.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, fs_bin); +#else ps.PerThreadScratchSpace = get_scratch_space(fs_bin); ps.ScratchSpaceBasePointer = get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin); +#endif } } @@ -2558,8 +2586,7 @@ emit_compute_state(struct anv_compute_pipeline *pipeline, anv_batch_emit(&pipeline->base.batch, GENX(CFE_STATE), cfe) { cfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1; - /* TODO: Enable gfx12-hp scratch support*/ - assert(get_scratch_space(cs_bin) == 0); + cfe.ScratchSpaceBuffer = get_scratch_surf(&pipeline->base, cs_bin); } } -- 2.7.4