From b4b17f8aaac83fa5ff9532533697fa643a8c5741 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 10 May 2023 08:16:59 +0300 Subject: [PATCH] Revert "intel/compiler: make uses_pos_offset a tri-state" This reverts commit 5489033fa8568ecacafe32ceab36f89f2e14f3dc. The problem I was trying to address is that we were programming the 3DSTATE_PS::PositionXYOffsetSelect bit differently with GPL (CENTROID) than without (NONE). I failed to understand that this bit also impacts the thread payload layout. GPL fragment shaders don't know ahead of time if pos_offset is going to be used. It'll be choosen at runtime base on push constant bits. So we need to program this bit different just to have a payload matching the compiled shader code. This fixes the freedoom replay with GPL FS shader in SIMD32. Signed-off-by: Lionel Landwerlin Acked-by: Ivan Briano Part-of: --- src/gallium/drivers/crocus/crocus_state.c | 12 +++++------- src/gallium/drivers/iris/iris_state.c | 4 +--- src/intel/compiler/brw_compiler.h | 22 +--------------------- src/intel/compiler/brw_fs.cpp | 16 ++++++---------- src/intel/vulkan/genX_pipeline.c | 6 ++---- src/intel/vulkan_hasvk/genX_pipeline.c | 6 ++---- 6 files changed, 17 insertions(+), 49 deletions(-) diff --git a/src/gallium/drivers/crocus/crocus_state.c b/src/gallium/drivers/crocus/crocus_state.c index 0bc05a2..28fa9e3 100644 --- a/src/gallium/drivers/crocus/crocus_state.c +++ b/src/gallium/drivers/crocus/crocus_state.c @@ -6505,9 +6505,7 @@ crocus_upload_dirty_render_state(struct crocus_context *ice, * look useful at the moment. We might need this in future. */ ps.PositionXYOffsetSelect = - brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, - 0 /* msaa_flags */) ? - POSOFFSET_SAMPLE : POSOFFSET_NONE; + wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; if (wm_prog_data->base.total_scratch) { struct crocus_bo *bo = crocus_get_scratch_space(ice, wm_prog_data->base.total_scratch, MESA_SHADER_FRAGMENT); @@ -7274,10 +7272,10 @@ crocus_upload_dirty_render_state(struct crocus_context *ice, * We only require XY sample offsets. So, this recommendation doesn't * look useful at the moment. We might need this in future. */ - wm.PositionXYOffsetSelect = - brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, - 0 /* msaa_flags */) ? - POSOFFSET_SAMPLE : POSOFFSET_NONE; + if (wm_prog_data->uses_pos_offset) + wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE; + else + wm.PositionXYOffsetSelect = POSOFFSET_NONE; #endif wm.LineStippleEnable = cso->cso.line_stipple_enable; wm.PolygonStippleEnable = cso->cso.poly_stipple_enable; diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 748bea9..ea0271f 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -4936,9 +4936,7 @@ iris_store_fs_state(const struct intel_device_info *devinfo, * look useful at the moment. We might need this in future. */ ps.PositionXYOffsetSelect = - brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, - 0 /* msaa_flags */) ? - POSOFFSET_SAMPLE : POSOFFSET_NONE; + wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; if (prog_data->total_scratch) { INIT_THREAD_SCRATCH_SIZE(ps); diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 7acffd6..f456acc 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -923,8 +923,7 @@ struct brw_wm_prog_data { bool dispatch_16; bool dispatch_32; bool dual_src_blend; - enum brw_sometimes uses_pos_offset; - bool read_pos_offset_input; + bool uses_pos_offset; bool uses_omask; bool uses_kill; bool uses_src_depth; @@ -1187,25 +1186,6 @@ brw_wm_prog_data_is_coarse(const struct brw_wm_prog_data *prog_data, return prog_data->coarse_pixel_dispatch; } -static inline bool -brw_wm_prog_data_uses_position_xy_offset(const struct brw_wm_prog_data *prog_data, - enum brw_wm_msaa_flags pushed_msaa_flags) -{ - bool per_sample; - if (pushed_msaa_flags & BRW_WM_MSAA_FLAG_ENABLE_DYNAMIC) { - per_sample = (pushed_msaa_flags & BRW_WM_MSAA_FLAG_PERSAMPLE_INTERP) != 0; - } else { - assert(prog_data->persample_dispatch == BRW_ALWAYS || - prog_data->persample_dispatch == BRW_NEVER); - per_sample = prog_data->persample_dispatch == BRW_ALWAYS; - } - - if (!per_sample) - return false; - - return prog_data->read_pos_offset_input; -} - struct brw_push_const_block { unsigned dwords; /* Dword count, not reg aligned */ unsigned regs; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 0a0ed57..59637d3 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7365,16 +7365,12 @@ brw_nir_populate_wm_prog_data(const nir_shader *shader, * per-sample dispatch. If we need gl_SamplePosition and we don't have * persample dispatch, we hard-code it to 0.5. */ - prog_data->read_pos_offset_input = - BITSET_TEST(shader->info.system_values_read, - SYSTEM_VALUE_SAMPLE_POS) || - BITSET_TEST(shader->info.system_values_read, - SYSTEM_VALUE_SAMPLE_POS_OR_CENTER); - - if (prog_data->read_pos_offset_input) - prog_data->uses_pos_offset = prog_data->persample_dispatch; - else - prog_data->uses_pos_offset = BRW_NEVER; + prog_data->uses_pos_offset = + prog_data->persample_dispatch != BRW_NEVER && + (BITSET_TEST(shader->info.system_values_read, + SYSTEM_VALUE_SAMPLE_POS) || + BITSET_TEST(shader->info.system_values_read, + SYSTEM_VALUE_SAMPLE_POS_OR_CENTER)); } prog_data->has_render_target_reads = shader->info.outputs_read != 0ull; diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 1e5a76d..5f4a56f 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -1535,10 +1535,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || wm_prog_data->base.ubo_ranges[0].length; ps.PositionXYOffsetSelect = - !brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, - pipeline->fs_msaa_flags) ? - POSOFFSET_NONE : - (persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID); + !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE : + persample ? POSOFFSET_SAMPLE : POSOFFSET_CENTROID; ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1; diff --git a/src/intel/vulkan_hasvk/genX_pipeline.c b/src/intel/vulkan_hasvk/genX_pipeline.c index 9a8483a..bd13d52 100644 --- a/src/intel/vulkan_hasvk/genX_pipeline.c +++ b/src/intel/vulkan_hasvk/genX_pipeline.c @@ -1695,10 +1695,8 @@ emit_3dstate_ps(struct anv_graphics_pipeline *pipeline, ps.BindingTableEntryCount = fs_bin->bind_map.surface_count; ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || wm_prog_data->base.ubo_ranges[0].length; - ps.PositionXYOffsetSelect = - brw_wm_prog_data_uses_position_xy_offset(wm_prog_data, - 0 /* msaa_flags */) ? - POSOFFSET_SAMPLE : POSOFFSET_NONE; + ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE; #if GFX_VER < 8 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; -- 2.7.4