From ac7664e493655e290783c23a0412b9c70936da50 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Mon, 13 Jul 2015 14:21:07 +0300 Subject: [PATCH] i965/gen7-8: Poke the 3DSTATE UAV access enable bits. v2: Set the PS UAV-only bit on HSW (Ken). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h | 4 ++++ src/mesa/drivers/dri/i965/gen7_gs_state.c | 4 +++- src/mesa/drivers/dri/i965/gen7_vs_state.c | 13 ++++++++----- src/mesa/drivers/dri/i965/gen7_wm_state.c | 9 +++++++++ src/mesa/drivers/dri/i965/gen8_gs_state.c | 4 +++- src/mesa/drivers/dri/i965/gen8_ps_state.c | 3 ++- src/mesa/drivers/dri/i965/gen8_vs_state.c | 4 +++- 7 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 9c232c4..7fa7c5f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1855,6 +1855,7 @@ enum brw_message_target { # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 # define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) # define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) +# define HSW_VS_UAV_ACCESS_ENABLE (1 << 12) /* DW4 */ # define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 # define GEN6_VS_URB_READ_LENGTH_SHIFT 11 @@ -1880,6 +1881,7 @@ enum brw_message_target { # define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) # define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16) +# define HSW_GS_UAV_ACCESS_ENABLE (1 << 12) /* DW4 */ # define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT 23 # define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17 @@ -2406,6 +2408,7 @@ enum brw_wm_barycentric_interp_mode { /* DW2 */ # define GEN7_WM_MSDISPMODE_PERSAMPLE (0 << 31) # define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31) +# define HSW_WM_UAV_ONLY (1 << 30) #define _3DSTATE_PS 0x7820 /* GEN7+ */ /* DW1: kernel pointer */ @@ -2429,6 +2432,7 @@ enum brw_wm_barycentric_interp_mode { # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE (1 << 8) # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7) # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE (1 << 6) +# define HSW_PS_UAV_ACCESS_ENABLE (1 << 5) # define GEN7_PS_POSOFFSET_NONE (0 << 3) # define GEN7_PS_POSOFFSET_CENTROID (2 << 3) # define GEN7_PS_POSOFFSET_SAMPLE (3 << 3) diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c index 8d6d3fe..497ecec 100644 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c @@ -59,7 +59,9 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (brw->is_haswell && prog_data->base.nr_image_params ? + HSW_GS_UAV_ACCESS_ENABLE : 0)); if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 00bc6f2..b7e4858 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -111,6 +111,7 @@ upload_vs_state(struct brw_context *brw) uint32_t floating_point_mode = 0; const int max_threads_shift = brw->is_haswell ? HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT; + const struct brw_vue_prog_data *prog_data = &brw->vs.prog_data->base; if (!brw->is_haswell && !brw->is_baytrail) gen7_emit_vs_workaround_flush(brw); @@ -125,19 +126,21 @@ upload_vs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) | ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) << - GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (brw->is_haswell && prog_data->base.nr_image_params ? + HSW_VS_UAV_ACCESS_ENABLE : 0)); - if (brw->vs.prog_data->base.base.total_scratch) { + if (prog_data->base.total_scratch) { OUT_RELOC(stage_state->scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(brw->vs.prog_data->base.base.total_scratch) - 11); + ffs(prog_data->base.total_scratch) - 11); } else { OUT_BATCH(0); } - OUT_BATCH((brw->vs.prog_data->base.base.dispatch_grf_start_reg << + OUT_BATCH((prog_data->base.dispatch_grf_start_reg << GEN6_VS_DISPATCH_START_GRF_SHIFT) | - (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | + (prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) | diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index aa47421..285311e 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -107,6 +107,12 @@ upload_wm_state(struct brw_context *brw) dw1 |= GEN7_WM_USES_INPUT_COVERAGE_MASK; } + /* _NEW_BUFFERS | _NEW_COLOR */ + if (brw->is_haswell && + !(brw_color_buffer_write_enabled(brw) || writes_depth) && + prog_data->base.nr_image_params) + dw2 |= HSW_WM_UAV_ONLY; + BEGIN_BATCH(3); OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); OUT_BATCH(dw1); @@ -209,6 +215,9 @@ gen7_upload_ps_state(struct brw_context *brw, _mesa_get_min_invocations_per_fragment(ctx, fp, false); assert(min_inv_per_frag >= 1); + if (brw->is_haswell && prog_data->base.nr_image_params) + dw4 |= HSW_PS_UAV_ACCESS_ENABLE; + if (prog_data->prog_offset_16 || prog_data->no_8) { dw4 |= GEN7_PS_16_DISPATCH_ENABLE; if (!prog_data->no_8 && min_inv_per_frag == 1) { diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c index 26a02d3..81bd3b2 100644 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c @@ -52,7 +52,9 @@ gen8_upload_gs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_GS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (prog_data->base.nr_image_params ? + HSW_GS_UAV_ACCESS_ENABLE : 0)); if (brw->gs.prog_data->base.base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index d544509..f84fbe1 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -61,7 +61,8 @@ gen8_upload_ps_extra(struct brw_context *brw, if (brw->gen >= 9 && prog_data->pulls_bary) dw1 |= GEN9_PSX_SHADER_PULLS_BARY; - if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx)) + if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) || + prog_data->base.nr_image_params) dw1 |= GEN8_PSX_SHADER_HAS_UAV; BEGIN_BATCH(2); diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c index 28f5add..8b5048b 100644 --- a/src/mesa/drivers/dri/i965/gen8_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c @@ -53,7 +53,9 @@ upload_vs_state(struct brw_context *brw) ((ALIGN(stage_state->sampler_count, 4) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT) | ((prog_data->base.binding_table.size_bytes / 4) << - GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) | + (prog_data->base.nr_image_params ? + HSW_VS_UAV_ACCESS_ENABLE : 0)); if (prog_data->base.total_scratch) { OUT_RELOC64(stage_state->scratch_bo, -- 2.7.4