clear_dirty_dmabuf_set(ice);
screen->vtbl.destroy_state(ice);
+
+ for (unsigned i = 0; i < ARRAY_SIZE(ice->shaders.scratch_surfs); i++)
+ pipe_resource_reference(&ice->shaders.scratch_surfs[i].res, NULL);
+
iris_destroy_program_cache(ice);
iris_destroy_border_color_pool(ice);
if (screen->measure.config)
* and shader stage.
*/
struct iris_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
+
+ /**
+ * Scratch buffer surface states on Gfx12.5+
+ */
+ struct iris_state_ref scratch_surfs[1 << 4];
} shaders;
struct intel_perf_context *perf_ctx;
struct iris_bo *iris_get_scratch_space(struct iris_context *ice,
unsigned per_thread_scratch,
gl_shader_stage stage);
+const struct iris_state_ref *iris_get_scratch_surf(struct iris_context *ice,
+ unsigned per_thread_scratch);
uint32_t iris_group_index_to_bti(const struct iris_binding_table *bt,
enum iris_surface_group group,
uint32_t index);
unsigned encoded_size = ffs(per_thread_scratch) - 11;
assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
+ assert(per_thread_scratch == 1 << (encoded_size + 10));
+
+ /* On GFX version 12.5, scratch access changed to a surface-based model.
+ * Instead of each shader type having its own layout based on IDs passed
+ * from the relevant fixed-function unit, all scratch access is based on
+ * thread IDs like it always has been for compute.
+ */
+ if (devinfo->verx10 >= 125)
+ stage = MESA_SHADER_COMPUTE;
struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
* in the base configuration.
*/
unsigned subslice_total = screen->subslice_total;
- if (devinfo->ver == 12)
+ if (devinfo->verx10 == 125)
+ subslice_total = 32;
+ else if (devinfo->ver == 12)
subslice_total = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);
else if (devinfo->ver == 11)
subslice_total = 8;
return *bop;
}
+const struct iris_state_ref *
+iris_get_scratch_surf(struct iris_context *ice,
+ unsigned per_thread_scratch)
+{
+ struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+ ASSERTED const struct intel_device_info *devinfo = &screen->devinfo;
+
+ assert(devinfo->verx10 >= 125);
+
+ unsigned encoded_size = ffs(per_thread_scratch) - 11;
+ assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
+ assert(per_thread_scratch == 1 << (encoded_size + 10));
+
+ struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
+
+ if (ref->res)
+ return ref;
+
+ struct iris_bo *scratch_bo =
+ iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
+
+ void *map = upload_state(ice->state.bindless_uploader, ref,
+ screen->isl_dev.ss.size, 64);
+
+ isl_buffer_fill_state(&screen->isl_dev, map,
+ .address = scratch_bo->gtt_offset,
+ .size_B = scratch_bo->size,
+ .format = ISL_FORMAT_RAW,
+ .swizzle = ISL_SWIZZLE_IDENTITY,
+ .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
+ .stride_B = per_thread_scratch,
+ .is_scratch = true);
+
+ return ref;
+}
+
/* ------------------------------------------------------------------- */
/**
pkt.Enable = true; \
\
if (prog_data->total_scratch) { \
- pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11; \
+ INIT_THREAD_SCRATCH_SIZE(pkt) \
}
+#if GFX_VERx10 >= 125
+#define INIT_THREAD_SCRATCH_SIZE(pkt)
+#define MERGE_SCRATCH_ADDR(name) \
+{ \
+ uint32_t pkt2[GENX(name##_length)] = {0}; \
+ _iris_pack_command(batch, GENX(name), pkt2, p) { \
+ p.ScratchSpaceBuffer = scratch_addr >> 4; \
+ } \
+ iris_emit_merge(batch, pkt, pkt2, GENX(name##_length)); \
+}
+#else
+#define INIT_THREAD_SCRATCH_SIZE(pkt) \
+ pkt.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
#define MERGE_SCRATCH_ADDR(name) \
{ \
uint32_t pkt2[GENX(name##_length)] = {0}; \
} \
iris_emit_merge(batch, pkt, pkt2, GENX(name##_length)); \
}
+#endif
/**
ps.PositionXYOffsetSelect =
wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
- if (prog_data->total_scratch)
- ps.PerThreadScratchSpace = ffs(prog_data->total_scratch) - 11;
+ if (prog_data->total_scratch) {
+ INIT_THREAD_SCRATCH_SIZE(ps);
+ }
}
iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
iris_get_scratch_space(ice, prog_data->total_scratch, stage);
iris_use_pinned_bo(batch, scratch_bo, true, IRIS_DOMAIN_NONE);
+#if GFX_VERx10 >= 125
+ const struct iris_state_ref *ref =
+ iris_get_scratch_surf(ice, prog_data->total_scratch);
+ iris_use_pinned_bo(batch, iris_resource_bo(ref->res),
+ false, IRIS_DOMAIN_NONE);
+ scratch_addr = ref->offset +
+ iris_resource_bo(ref->res)->gtt_offset -
+ IRIS_MEMZONE_BINDLESS_START;
+ assert((scratch_addr & 0x3f) == 0 && scratch_addr < (1 << 26));
+#else
scratch_addr = scratch_bo->gtt_offset;
+#endif
}
return scratch_addr;
ps.KernelStartPointer2 = KSP(shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
+#if GFX_VERx10 >= 125
+ ps.ScratchSpaceBuffer = scratch_addr >> 4;
+#else
ps.ScratchSpaceBasePointer =
rw_bo(NULL, scratch_addr, IRIS_DOMAIN_NONE);
+#endif
}
uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0};
if (stage_dirty & IRIS_STAGE_DIRTY_CS) {
iris_emit_cmd(batch, GENX(CFE_STATE), cfe) {
- /* TODO: Enable gfx12-hp scratch support*/
- assert(prog_data->total_scratch == 0);
-
cfe.MaximumNumberofThreads =
devinfo->max_cs_threads * screen->subslice_total - 1;
+ if (prog_data->total_scratch > 0) {
+ cfe.ScratchSpaceBuffer =
+ iris_get_scratch_surf(ice, prog_data->total_scratch)->offset >> 4;
+ }
}
}