From ae18e1e707c4be005256305a30b747ab1bdf55d8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 20 Oct 2020 14:27:58 -0500 Subject: [PATCH] iris: Add support for scratch on XeHP Rework: * Jordan: Handle prog_data->total_scratch==0 in iris_upload_compute_walker * Jordan: Resolve iris_get_scratch_space conflict with e2c5ef6cd65 * Jordan: Rebase on 4256f7ed584. broken * Ken: Mostly fixed the rebase * Jordan: Fix two small compilation issues * Jordan: Rebase on Ken's ("iris: Make a pin_scratch_space() helper") * Lionel: Fix a few bugs with scratch handles * Jason: Tidy the patch up a bit Reviewed-by: Jason Ekstrand Reviewed-by: Jordan Justen Part-of: --- src/gallium/drivers/iris/iris_context.c | 4 +++ src/gallium/drivers/iris/iris_context.h | 7 +++++ src/gallium/drivers/iris/iris_program.c | 49 ++++++++++++++++++++++++++++++++- src/gallium/drivers/iris/iris_state.c | 43 +++++++++++++++++++++++++---- 4 files changed, 96 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c index 7bec71f..274d0c0 100644 --- a/src/gallium/drivers/iris/iris_context.c +++ b/src/gallium/drivers/iris/iris_context.c @@ -223,6 +223,10 @@ iris_destroy_context(struct pipe_context *ctx) clear_dirty_dmabuf_set(ice); screen->vtbl.destroy_state(ice); + + for (unsigned i = 0; i < ARRAY_SIZE(ice->shaders.scratch_surfs); i++) + pipe_resource_reference(&ice->shaders.scratch_surfs[i].res, NULL); + iris_destroy_program_cache(ice); iris_destroy_border_color_pool(ice); if (screen->measure.config) diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 18a6ff6..1e7d4e9 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -655,6 +655,11 @@ struct iris_context { * and shader stage. */ struct iris_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES]; + + /** + * Scratch buffer surface states on Gfx12.5+ + */ + struct iris_state_ref scratch_surfs[1 << 4]; } shaders; struct intel_perf_context *perf_ctx; @@ -895,6 +900,8 @@ const struct shader_info *iris_get_shader_info(const struct iris_context *ice, struct iris_bo *iris_get_scratch_space(struct iris_context *ice, unsigned per_thread_scratch, gl_shader_stage stage); +const struct iris_state_ref *iris_get_scratch_surf(struct iris_context *ice, + unsigned per_thread_scratch); uint32_t iris_group_index_to_bti(const struct iris_binding_table *bt, enum iris_surface_group group, uint32_t index); diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 9e5ae7a..ddf252e 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -2150,6 +2150,15 @@ iris_get_scratch_space(struct iris_context *ice, unsigned encoded_size = ffs(per_thread_scratch) - 11; assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos)); + assert(per_thread_scratch == 1 << (encoded_size + 10)); + + /* On GFX version 12.5, scratch access changed to a surface-based model. + * Instead of each shader type having its own layout based on IDs passed + * from the relevant fixed-function unit, all scratch access is based on + * thread IDs like it always has been for compute. + */ + if (devinfo->verx10 >= 125) + stage = MESA_SHADER_COMPUTE; struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage]; @@ -2168,7 +2177,9 @@ iris_get_scratch_space(struct iris_context *ice, * in the base configuration. */ unsigned subslice_total = screen->subslice_total; - if (devinfo->ver == 12) + if (devinfo->verx10 == 125) + subslice_total = 32; + else if (devinfo->ver == 12) subslice_total = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2); else if (devinfo->ver == 11) subslice_total = 8; @@ -2213,6 +2224,42 @@ iris_get_scratch_space(struct iris_context *ice, return *bop; } +const struct iris_state_ref * +iris_get_scratch_surf(struct iris_context *ice, + unsigned per_thread_scratch) +{ + struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; + ASSERTED const struct intel_device_info *devinfo = &screen->devinfo; + + assert(devinfo->verx10 >= 125); + + unsigned encoded_size = ffs(per_thread_scratch) - 11; + assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs)); + assert(per_thread_scratch == 1 << (encoded_size + 10)); + + struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size]; + + if (ref->res) + return ref; + + struct iris_bo *scratch_bo = + iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE); + + void *map = upload_state(ice->state.bindless_uploader, ref, + screen->isl_dev.ss.size, 64); + + isl_buffer_fill_state(&screen->isl_dev, map, + .address = scratch_bo->gtt_offset, + .size_B = scratch_bo->size, + .format = ISL_FORMAT_RAW, + .swizzle = ISL_SWIZZLE_IDENTITY, + .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0), + .stride_B = per_thread_scratch, + .is_scratch = true); + + return ref; +} + /* ------------------------------------------------------------------- */ /** diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 9e14fb2..f520ac3 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4317,9 +4317,22 @@ KSP(const struct iris_compiled_shader *shader) pkt.Enable = true; \ \ if (prog_data->total_scratch) { \ - pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \ + INIT_THREAD_SCRATCH_SIZE(pkt) \ } +#if GFX_VERx10 >= 125 +#define INIT_THREAD_SCRATCH_SIZE(pkt) +#define MERGE_SCRATCH_ADDR(name) \ +{ \ + uint32_t pkt2[GENX(name##_length)] = {0}; \ + _iris_pack_command(batch, GENX(name), pkt2, p) { \ + p.ScratchSpaceBuffer = scratch_addr >> 4; \ + } \ + iris_emit_merge(batch, pkt, pkt2, GENX(name##_length)); \ +} +#else +#define INIT_THREAD_SCRATCH_SIZE(pkt) \ + pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; #define MERGE_SCRATCH_ADDR(name) \ { \ uint32_t pkt2[GENX(name##_length)] = {0}; \ @@ -4329,6 +4342,7 @@ KSP(const struct iris_compiled_shader *shader) } \ iris_emit_merge(batch, pkt, pkt2, GENX(name##_length)); \ } +#endif /** @@ -4515,8 +4529,9 @@ iris_store_fs_state(const struct intel_device_info *devinfo, ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; - if (prog_data->total_scratch) - ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; + if (prog_data->total_scratch) { + INIT_THREAD_SCRATCH_SIZE(ps); + } } iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { @@ -5078,7 +5093,18 @@ pin_scratch_space(struct iris_context *ice, iris_get_scratch_space(ice, prog_data->total_scratch, stage); iris_use_pinned_bo(batch, scratch_bo, true, IRIS_DOMAIN_NONE); +#if GFX_VERx10 >= 125 + const struct iris_state_ref *ref = + iris_get_scratch_surf(ice, prog_data->total_scratch); + iris_use_pinned_bo(batch, iris_resource_bo(ref->res), + false, IRIS_DOMAIN_NONE); + scratch_addr = ref->offset + + iris_resource_bo(ref->res)->gtt_offset - + IRIS_MEMZONE_BINDLESS_START; + assert((scratch_addr & 0x3f) == 0 && scratch_addr < (1 << 26)); +#else scratch_addr = scratch_bo->gtt_offset; +#endif } return scratch_addr; @@ -5915,8 +5941,12 @@ iris_upload_dirty_render_state(struct iris_context *ice, ps.KernelStartPointer2 = KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); +#if GFX_VERx10 >= 125 + ps.ScratchSpaceBuffer = scratch_addr >> 4; +#else ps.ScratchSpaceBasePointer = rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE); +#endif } uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0}; @@ -6829,11 +6859,12 @@ iris_upload_compute_walker(struct iris_context *ice, if (stage_dirty & IRIS_STAGE_DIRTY_CS) { iris_emit_cmd(batch, GENX(CFE_STATE), cfe) { - /* TODO: Enable gfx12-hp scratch support*/ - assert(prog_data->total_scratch == 0); - cfe.MaximumNumberofThreads = devinfo->max_cs_threads * screen->subslice_total - 1; + if (prog_data->total_scratch > 0) { + cfe.ScratchSpaceBuffer = + iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4; + } } } -- 2.7.4