From 4db98f8beb990676be0833dda6c37566d0283911 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 22 Jun 2016 00:33:46 -0700 Subject: [PATCH] i965: Combine 3DSTATE_STREAMOUT emitters and genX_sol_state atoms. They're basically the same. Let's avoid the code duplication. v2: Fix SO_BUFFER_ENABLE stuff to only happen on Gen < 8 (caught by Jason Ekstrand). Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_state.h | 2 +- src/mesa/drivers/dri/i965/brw_state_upload.c | 2 +- src/mesa/drivers/dri/i965/gen7_sol_state.c | 42 ++++++++++--- src/mesa/drivers/dri/i965/gen8_sol_state.c | 90 +--------------------------- 4 files changed, 37 insertions(+), 99 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b29412e..a16e876 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -166,7 +166,6 @@ extern const struct brw_tracked_state gen8_wm_state; extern const struct brw_tracked_state gen8_raster_state; extern const struct brw_tracked_state gen8_sbe_state; extern const struct brw_tracked_state gen8_sf_state; -extern const struct brw_tracked_state gen8_sol_state; extern const struct brw_tracked_state gen8_sf_clip_viewport; extern const struct brw_tracked_state gen8_vertices; extern const struct brw_tracked_state gen8_vf_topology; @@ -303,6 +302,7 @@ void gen8_upload_ps_extra(struct brw_context *brw, /* gen7_sol_state.c */ void gen7_upload_3dstate_so_decl_list(struct brw_context *brw, const struct brw_vue_map *vue_map); +void gen8_upload_3dstate_so_buffers(struct brw_context *brw); /* gen8_surface_state.c */ diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 0b47ebe..4a20821 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -337,7 +337,7 @@ static const struct brw_tracked_state *gen8_render_atoms[] = &gen7_te_state, &gen8_ds_state, &gen8_gs_state, - &gen8_sol_state, + &gen7_sol_state, &gen6_clip_state, &gen8_raster_state, &gen8_sbe_state, diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index 4749cc8..6f51040 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -222,7 +222,9 @@ upload_3dstate_streamout(struct brw_context *brw, bool active, /* BRW_NEW_TRANSFORM_FEEDBACK */ struct gl_transform_feedback_object *xfb_obj = ctx->TransformFeedback.CurrentObject; - uint32_t dw1 = 0, dw2 = 0; + const struct gl_transform_feedback_info *linked_xfb_info = + &xfb_obj->shader_program->LinkedTransformFeedback; + uint32_t dw1 = 0, dw2 = 0, dw3 = 0, dw4 = 0; int i; if (active) { @@ -237,10 +239,12 @@ upload_3dstate_streamout(struct brw_context *brw, bool active, if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) dw1 |= SO_REORDER_TRAILING; - for (i = 0; i < 4; i++) { - if (xfb_obj->Buffers[i]) { - dw1 |= SO_BUFFER_ENABLE(i); - } + if (brw->gen < 8) { + for (i = 0; i < 4; i++) { + if (xfb_obj->Buffers[i]) { + dw1 |= SO_BUFFER_ENABLE(i); + } + } } /* We always read the whole vertex. This could be reduced at some @@ -258,12 +262,30 @@ upload_3dstate_streamout(struct brw_context *brw, bool active, dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET); dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_3_VERTEX_READ_LENGTH); + + if (brw->gen >= 8) { + /* Set buffer pitches; 0 means unbound. */ + if (xfb_obj->Buffers[0]) + dw3 |= linked_xfb_info->Buffers[0].Stride * 4; + if (xfb_obj->Buffers[1]) + dw3 |= (linked_xfb_info->Buffers[1].Stride * 4) << 16; + if (xfb_obj->Buffers[2]) + dw4 |= linked_xfb_info->Buffers[2].Stride * 4; + if (xfb_obj->Buffers[3]) + dw4 |= (linked_xfb_info->Buffers[3].Stride * 4) << 16; + } } - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); + const int dwords = brw->gen >= 8 ? 5 : 3; + + BEGIN_BATCH(dwords); + OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (dwords - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); + if (dwords > 3) { + OUT_BATCH(dw3); + OUT_BATCH(dw4); + } ADVANCE_BATCH(); } @@ -275,7 +297,11 @@ upload_sol_state(struct brw_context *brw) bool active = _mesa_is_xfb_active_and_unpaused(ctx); if (active) { - upload_3dstate_so_buffers(brw); + if (brw->gen >= 8) + gen8_upload_3dstate_so_buffers(brw); + else + upload_3dstate_so_buffers(brw); + /* BRW_NEW_VUE_MAP_GEOM_OUT */ gen7_upload_3dstate_so_decl_list(brw, &brw->vue_map_geom_out); } diff --git a/src/mesa/drivers/dri/i965/gen8_sol_state.c b/src/mesa/drivers/dri/i965/gen8_sol_state.c index a72f5e1..21cc129 100644 --- a/src/mesa/drivers/dri/i965/gen8_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c @@ -35,7 +35,7 @@ #include "intel_buffer_objects.h" #include "main/transformfeedback.h" -static void +void gen8_upload_3dstate_so_buffers(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; @@ -93,91 +93,3 @@ gen8_upload_3dstate_so_buffers(struct brw_context *brw) } brw_obj->zero_offsets = false; } - -static void -gen8_upload_3dstate_streamout(struct brw_context *brw, bool active, - struct brw_vue_map *vue_map) -{ - struct gl_context *ctx = &brw->ctx; - - /* BRW_NEW_TRANSFORM_FEEDBACK */ - struct gl_transform_feedback_object *xfb_obj = - ctx->TransformFeedback.CurrentObject; - const struct gl_transform_feedback_info *linked_xfb_info = - &xfb_obj->shader_program->LinkedTransformFeedback; - uint32_t dw1 = 0, dw2 = 0, dw3 = 0, dw4 = 0; - - if (active) { - int urb_entry_read_offset = 0; - int urb_entry_read_length = (vue_map->num_slots + 1) / 2 - - urb_entry_read_offset; - - dw1 |= SO_FUNCTION_ENABLE; - dw1 |= SO_STATISTICS_ENABLE; - - /* _NEW_LIGHT */ - if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) - dw1 |= SO_REORDER_TRAILING; - - /* We always read the whole vertex. This could be reduced at some - * point by reading less and offsetting the register index in the - * SO_DECLs. - */ - dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_0_VERTEX_READ_OFFSET); - dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_0_VERTEX_READ_LENGTH); - - dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_1_VERTEX_READ_OFFSET); - dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_1_VERTEX_READ_LENGTH); - - dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_2_VERTEX_READ_OFFSET); - dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_2_VERTEX_READ_LENGTH); - - dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET); - dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_3_VERTEX_READ_LENGTH); - - /* Set buffer pitches; 0 means unbound. */ - if (xfb_obj->Buffers[0]) - dw3 |= linked_xfb_info->Buffers[0].Stride * 4; - if (xfb_obj->Buffers[1]) - dw3 |= (linked_xfb_info->Buffers[1].Stride * 4) << 16; - if (xfb_obj->Buffers[2]) - dw4 |= linked_xfb_info->Buffers[2].Stride * 4; - if (xfb_obj->Buffers[3]) - dw4 |= (linked_xfb_info->Buffers[3].Stride * 4) << 16; - } - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (5 - 2)); - OUT_BATCH(dw1); - OUT_BATCH(dw2); - OUT_BATCH(dw3); - OUT_BATCH(dw4); - ADVANCE_BATCH(); -} - -static void -upload_sol_state(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_TRANSFORM_FEEDBACK */ - bool active = _mesa_is_xfb_active_and_unpaused(ctx); - - if (active) { - gen8_upload_3dstate_so_buffers(brw); - /* BRW_NEW_VUE_MAP_GEOM_OUT */ - gen7_upload_3dstate_so_decl_list(brw, &brw->vue_map_geom_out); - } - - gen8_upload_3dstate_streamout(brw, active, &brw->vue_map_geom_out); -} - -const struct brw_tracked_state gen8_sol_state = { - .dirty = { - .mesa = _NEW_LIGHT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TRANSFORM_FEEDBACK | - BRW_NEW_VUE_MAP_GEOM_OUT, - }, - .emit = upload_sol_state, -}; -- 2.7.4