From bc1ff4509dea8fe0a324f85df1cc2c48a50b2484 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Mon, 27 Mar 2017 08:42:45 -0700 Subject: [PATCH] i965: Port gen6+ state emitting code to genxml. Ported in this patch: - 3DSTATE_DS - 3DSTATE_GS - 3DSTATE_HS - 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL v3: - Remove NEW_TRANSFORM blocks (Ken) - Bring back some comments and workaround for Ivybridge (Ken) Signed-off-by: Rafael Antognolli Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 6 - src/mesa/drivers/dri/i965/brw_state.h | 18 +- src/mesa/drivers/dri/i965/gen6_gs_state.c | 129 +------ src/mesa/drivers/dri/i965/gen6_viewport_state.c | 147 -------- src/mesa/drivers/dri/i965/gen7_ds_state.c | 68 ---- src/mesa/drivers/dri/i965/gen7_gs_state.c | 167 --------- src/mesa/drivers/dri/i965/gen7_hs_state.c | 63 ---- src/mesa/drivers/dri/i965/gen7_viewport_state.c | 100 ----- src/mesa/drivers/dri/i965/gen8_ds_state.c | 116 ------ src/mesa/drivers/dri/i965/gen8_gs_state.c | 146 -------- src/mesa/drivers/dri/i965/gen8_hs_state.c | 93 ----- src/mesa/drivers/dri/i965/gen8_viewport_state.c | 120 ------ src/mesa/drivers/dri/i965/genX_state_upload.c | 478 +++++++++++++++++++++++- 13 files changed, 471 insertions(+), 1180 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/gen7_gs_state.c delete mode 100644 src/mesa/drivers/dri/i965/gen7_viewport_state.c delete mode 100644 src/mesa/drivers/dri/i965/gen8_ds_state.c delete mode 100644 src/mesa/drivers/dri/i965/gen8_gs_state.c delete mode 100644 src/mesa/drivers/dri/i965/gen8_hs_state.c delete mode 100644 src/mesa/drivers/dri/i965/gen8_viewport_state.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 95d29ac..3f0c66a 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -92,24 +92,18 @@ i965_FILES = \ gen6_wm_state.c \ gen7_cs_state.c \ gen7_ds_state.c \ - gen7_gs_state.c \ gen7_hs_state.c \ gen7_l3_state.c \ gen7_misc_state.c \ gen7_sol_state.c \ gen7_te_state.c \ gen7_urb.c \ - gen7_viewport_state.c \ gen7_wm_surface_state.c \ gen8_blend_state.c \ gen8_depth_state.c \ gen8_draw_upload.c \ - gen8_ds_state.c \ - gen8_gs_state.c \ - gen8_hs_state.c \ gen8_multisample_state.c \ gen8_surface_state.c \ - gen8_viewport_state.c \ hsw_queryobj.c \ hsw_sol.c \ intel_batchbuffer.c \ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 72d63f6..b6e8abc 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -109,9 +109,7 @@ extern const struct brw_tracked_state brw_cs_state; extern const struct brw_tracked_state gen7_cs_push_constants; extern const struct brw_tracked_state gen6_binding_table_pointers; extern const struct brw_tracked_state gen6_blend_state; -extern const struct brw_tracked_state gen6_sf_and_clip_viewports; extern const struct brw_tracked_state gen6_color_calc_state; -extern const struct brw_tracked_state gen6_gs_state; extern const struct brw_tracked_state gen6_gs_push_constants; extern const struct brw_tracked_state gen6_gs_binding_table; extern const struct brw_tracked_state gen6_multisample_state; @@ -125,26 +123,18 @@ extern const struct brw_tracked_state gen6_viewport_state; extern const struct brw_tracked_state gen6_vs_push_constants; extern const struct brw_tracked_state gen6_wm_push_constants; extern const struct brw_tracked_state gen7_depthbuffer; -extern const struct brw_tracked_state gen7_ds_state; -extern const struct brw_tracked_state gen7_gs_state; extern const struct brw_tracked_state gen7_tcs_push_constants; -extern const struct brw_tracked_state gen7_hs_state; extern const struct brw_tracked_state gen7_l3_state; extern const struct brw_tracked_state gen7_push_constant_space; -extern const struct brw_tracked_state gen7_sf_clip_viewport; extern const struct brw_tracked_state gen7_te_state; extern const struct brw_tracked_state gen7_tes_push_constants; extern const struct brw_tracked_state gen7_urb; extern const struct brw_tracked_state haswell_cut_index; extern const struct brw_tracked_state gen8_blend_state; -extern const struct brw_tracked_state gen8_ds_state; -extern const struct brw_tracked_state gen8_gs_state; -extern const struct brw_tracked_state gen8_hs_state; extern const struct brw_tracked_state gen8_index_buffer; extern const struct brw_tracked_state gen8_multisample_state; extern const struct brw_tracked_state gen8_pma_fix; extern const struct brw_tracked_state gen8_ps_blend; -extern const struct brw_tracked_state gen8_sf_clip_viewport; extern const struct brw_tracked_state gen8_vertices; extern const struct brw_tracked_state gen8_vf_topology; extern const struct brw_tracked_state brw_cs_work_groups_surface; @@ -383,12 +373,6 @@ use_state_point_size(const struct brw_context *brw) (brw->vue_map_geom_out.slots_valid & VARYING_BIT_PSIZ) == 0; } -void brw_calculate_guardband_size(const struct gen_device_info *devinfo, - uint32_t fb_width, uint32_t fb_height, - float m00, float m11, float m30, float m31, - float *xmin, float *xmax, - float *ymin, float *ymax); - void brw_copy_pipeline_atoms(struct brw_context *brw, enum brw_pipeline pipeline, const struct brw_tracked_state **atoms, @@ -402,6 +386,8 @@ void gen75_init_atoms(struct brw_context *brw); void gen8_init_atoms(struct brw_context *brw); void gen9_init_atoms(struct brw_context *brw); +void upload_gs_state_for_tf(struct brw_context *brw); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index 0cdfcf5..6a9e951 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -64,7 +64,7 @@ const struct brw_tracked_state gen6_gs_push_constants = { .emit = gen6_upload_gs_push_constants, }; -static void +void upload_gs_state_for_tf(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; @@ -87,130 +87,3 @@ upload_gs_state_for_tf(struct brw_context *brw) GEN6_GS_ENABLE); ADVANCE_BATCH(); } - -static void -upload_gs_state(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - /* BRW_NEW_GEOMETRY_PROGRAM */ - bool active = brw->geometry_program; - /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_state *stage_state = &brw->gs.base; - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_state->prog_data); - - if (!active || stage_state->push_const_size == 0) { - /* Disable the push constant buffers. */ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | - GEN6_CONSTANT_BUFFER_0_ENABLE | - (5 - 2)); - /* Pointer to the GS constant buffer. Covered by the set of - * state flags from gen6_upload_vs_constants - */ - OUT_BATCH(stage_state->push_const_offset + - stage_state->push_const_size - 1); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - if (active) { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(stage_state->prog_offset); - - /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it - * was previously done for gen6. - * - * TODO: test with both disabled to see if the HW is behaving - * as expected, like in gen7. - */ - OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE | - ((ALIGN(stage_state->sampler_count, 4)/4) << - GEN6_GS_SAMPLER_COUNT_SHIFT) | - ((prog_data->binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - - if (prog_data->total_scratch) { - OUT_RELOC(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(stage_state->per_thread_scratch) - 11); - } else { - OUT_BATCH(0); /* no scratch space */ - } - - OUT_BATCH((vue_prog_data->urb_read_length << - GEN6_GS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | - (prog_data->dispatch_grf_start_reg << - GEN6_GS_DISPATCH_START_GRF_SHIFT)); - - OUT_BATCH(((devinfo->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) | - GEN6_GS_STATISTICS_ENABLE | - GEN6_GS_SO_STATISTICS_ENABLE | - GEN6_GS_RENDERING_ENABLE); - - if (brw->geometry_program->info.has_transform_feedback_varyings) { - /* GEN6_GS_REORDER is equivalent to GEN7_GS_REORDER_TRAILING - * in gen7. SNB and IVB specs are the same regarding the reordering of - * TRISTRIP/TRISTRIP_REV vertices and triangle orientation, so we do - * the same thing in both generations. For more details, see the - * comment in gen7_gs_state.c - */ - OUT_BATCH(GEN6_GS_REORDER | - GEN6_GS_SVBI_PAYLOAD_ENABLE | - GEN6_GS_ENABLE); - } else { - OUT_BATCH(GEN6_GS_REORDER | GEN6_GS_ENABLE); - } - ADVANCE_BATCH(); - } else if (brw->ff_gs.prog_active) { - /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS - * program. This function provides the needed 3DSTATE_GS for this. - */ - upload_gs_state_for_tf(brw); - } else { - /* No GS function required */ - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); /* prog_bo */ - OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | - (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | - (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | - GEN6_GS_STATISTICS_ENABLE | - GEN6_GS_RENDERING_ENABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - brw->gs.enabled = active; -} - -const struct brw_tracked_state gen6_gs_state = { - .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS | - _NEW_TRANSFORM, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_FF_GS_PROG_DATA | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA | - BRW_NEW_PUSH_CONSTANT_ALLOCATION, - }, - .emit = upload_gs_state, -}; diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index 41cc459..e3968b1 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -33,153 +33,6 @@ #include "main/framebuffer.h" #include "main/viewport.h" -void -brw_calculate_guardband_size(const struct gen_device_info *devinfo, - uint32_t fb_width, uint32_t fb_height, - float m00, float m11, float m30, float m31, - float *xmin, float *xmax, - float *ymin, float *ymax) -{ - /* According to the "Vertex X,Y Clamping and Quantization" section of the - * Strips and Fans documentation: - * - * "The vertex X and Y screen-space coordinates are also /clamped/ to the - * fixed-point "guardband" range supported by the rasterization hardware" - * - * and - * - * "In almost all circumstances, if an object’s vertices are actually - * modified by this clamping (i.e., had X or Y coordinates outside of - * the guardband extent the rendered object will not match the intended - * result. Therefore software should take steps to ensure that this does - * not happen - e.g., by clipping objects such that they do not exceed - * these limits after the Drawing Rectangle is applied." - * - * I believe the fundamental restriction is that the rasterizer (in - * the SF/WM stages) have a limit on the number of pixels that can be - * rasterized. We need to ensure any coordinates beyond the rasterizer - * limit are handled by the clipper. So effectively that limit becomes - * the clipper's guardband size. - * - * It goes on to say: - * - * "In addition, in order to be correctly rendered, objects must have a - * screenspace bounding box not exceeding 8K in the X or Y direction. - * This additional restriction must also be comprehended by software, - * i.e., enforced by use of clipping." - * - * This makes no sense. Gen7+ hardware supports 16K render targets, - * and you definitely need to be able to draw polygons that fill the - * surface. Our assumption is that the rasterizer was limited to 8K - * on Sandybridge, which only supports 8K surfaces, and it was actually - * increased to 16K on Ivybridge and later. - * - * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge. - */ - const float gb_size = devinfo->gen >= 7 ? 16384.0f : 8192.0f; - - if (m00 != 0 && m11 != 0) { - /* First, we compute the screen-space render area */ - const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00); - const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00); - const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11); - const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11); - - /* We want the guardband to be centered on that */ - const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size; - const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size; - const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size; - const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size; - - /* Now we need it in native device coordinates */ - const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00; - const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00; - const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11; - const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11; - - /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be - * flipped upside-down. X should be fine though. - */ - assert(ndc_gb_xmin <= ndc_gb_xmax); - *xmin = ndc_gb_xmin; - *xmax = ndc_gb_xmax; - *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax); - *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax); - } else { - /* The viewport scales to 0, so nothing will be rendered. */ - *xmin = 0.0f; - *xmax = 0.0f; - *ymin = 0.0f; - *ymax = 0.0f; - } -} - -static void -gen6_upload_sf_and_clip_viewports(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct gen6_sf_viewport *sfv; - struct brw_clipper_viewport *clv; - GLfloat y_scale, y_bias; - - /* BRW_NEW_VIEWPORT_COUNT */ - const unsigned viewport_count = brw->clip.viewport_count; - - /* _NEW_BUFFERS */ - struct gl_framebuffer *fb = ctx->DrawBuffer; - const bool render_to_fbo = _mesa_is_user_fbo(fb); - const uint32_t fb_width = _mesa_geometric_width(ctx->DrawBuffer); - const uint32_t fb_height = _mesa_geometric_height(ctx->DrawBuffer); - - sfv = brw_state_batch(brw, sizeof(*sfv) * viewport_count, - 32, &brw->sf.vp_offset); - memset(sfv, 0, sizeof(*sfv) * viewport_count); - - clv = brw_state_batch(brw, sizeof(*clv) * viewport_count, - 32, &brw->clip.vp_offset); - - if (render_to_fbo) { - y_scale = 1.0; - y_bias = 0.0; - } else { - y_scale = -1.0; - y_bias = (float)fb_height; - } - - for (unsigned i = 0; i < viewport_count; i++) { - float scale[3], translate[3]; - - /* _NEW_VIEWPORT */ - _mesa_get_viewport_xform(ctx, i, scale, translate); - sfv[i].m00 = scale[0]; - sfv[i].m11 = scale[1] * y_scale; - sfv[i].m22 = scale[2]; - sfv[i].m30 = translate[0]; - sfv[i].m31 = translate[1] * y_scale + y_bias; - sfv[i].m32 = translate[2]; - - brw_calculate_guardband_size(devinfo, fb_width, fb_height, - sfv[i].m00, sfv[i].m11, - sfv[i].m30, sfv[i].m31, - &clv[i].xmin, &clv[i].xmax, - &clv[i].ymin, &clv[i].ymax); - } - - brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP; -} - -const struct brw_tracked_state gen6_sf_and_clip_viewports = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_VIEWPORT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VIEWPORT_COUNT, - }, - .emit = gen6_upload_sf_and_clip_viewports, -}; - static void upload_viewport_state_pointers(struct brw_context *brw) { BEGIN_BATCH(4); diff --git a/src/mesa/drivers/dri/i965/gen7_ds_state.c b/src/mesa/drivers/dri/i965/gen7_ds_state.c index 7491595..f6bb41d 100644 --- a/src/mesa/drivers/dri/i965/gen7_ds_state.c +++ b/src/mesa/drivers/dri/i965/gen7_ds_state.c @@ -55,71 +55,3 @@ const struct brw_tracked_state gen7_tes_push_constants = { }, .emit = gen7_upload_tes_push_constants, }; - -static void -gen7_upload_ds_state(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->tes.base; - /* BRW_NEW_TESS_PROGRAMS */ - bool active = brw->tess_eval_program; - - /* BRW_NEW_TES_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_state->prog_data); - const struct brw_tes_prog_data *tes_prog_data = - brw_tes_prog_data(stage_state->prog_data); - - const unsigned thread_count = (devinfo->max_tes_threads - 1) << - (brw->is_haswell ? HSW_DS_MAX_THREADS_SHIFT : GEN7_DS_MAX_THREADS_SHIFT); - - if (active) { - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); - OUT_BATCH(stage_state->prog_offset); - OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4), - GEN7_DS_SAMPLER_COUNT) | - SET_FIELD(prog_data->binding_table.size_bytes / 4, - GEN7_DS_BINDING_TABLE_ENTRY_COUNT)); - if (prog_data->total_scratch) { - OUT_RELOC(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(stage_state->per_thread_scratch) - 11); - } else { - OUT_BATCH(0); - } - OUT_BATCH(SET_FIELD(prog_data->dispatch_grf_start_reg, - GEN7_DS_DISPATCH_START_GRF) | - SET_FIELD(vue_prog_data->urb_read_length, - GEN7_DS_URB_READ_LENGTH)); - - OUT_BATCH(GEN7_DS_ENABLE | - GEN7_DS_STATISTICS_ENABLE | - thread_count | - (tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ? - GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0)); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - brw->tes.enabled = active; -} - -const struct brw_tracked_state gen7_ds_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TESS_PROGRAMS | - BRW_NEW_TES_PROG_DATA, - }, - .emit = gen7_upload_ds_state, -}; diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c deleted file mode 100644 index dc5e006..0000000 --- a/src/mesa/drivers/dri/i965/gen7_gs_state.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "intel_batchbuffer.h" - -static void -upload_gs_state(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->gs.base; - const int max_threads_shift = brw->is_haswell ? - HSW_GS_MAX_THREADS_SHIFT : GEN6_GS_MAX_THREADS_SHIFT; - /* BRW_NEW_GEOMETRY_PROGRAM */ - bool active = brw->geometry_program; - /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_state->prog_data); - const struct brw_gs_prog_data *gs_prog_data = - brw_gs_prog_data(stage_state->prog_data); - - /** - * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > - * Geometry > Geometry Shader > State: - * - * "Note: Because of corruption in IVB:GT2, software needs to flush the - * whole fixed function pipeline when the GS enable changes value in - * the 3DSTATE_GS." - * - * The hardware architects have clarified that in this context "flush the - * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS - * Stall" bit set. - */ - if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled != active) - gen7_emit_cs_stall_flush(brw); - - if (active) { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(stage_state->prog_offset); - OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) << - GEN6_GS_SAMPLER_COUNT_SHIFT) | - ((prog_data->binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - - if (prog_data->total_scratch) { - OUT_RELOC(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(stage_state->per_thread_scratch) - 11); - } else { - OUT_BATCH(0); - } - - uint32_t dw4 = - ((gs_prog_data->output_vertex_size_hwords * 2 - 1) << - GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | - (gs_prog_data->output_topology << GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | - (vue_prog_data->urb_read_length << - GEN6_GS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | - (prog_data->dispatch_grf_start_reg << - GEN6_GS_DISPATCH_START_GRF_SHIFT); - - /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between - * Ivy Bridge and Haswell. - * - * On Ivy Bridge, setting this bit causes the vertices of a triangle - * strip to be delivered to the geometry shader in an order that does - * not strictly follow the OpenGL spec, but preserves triangle - * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then - * the geometry shader sees triangles: - * - * (1, 2, 3), (2, 4, 3), (3, 4, 5) - * - * (Clearing the bit is even worse, because it fails to preserve - * orientation). - * - * Triangle strips with adjacency always ordered in a way that preserves - * triangle orientation but does not strictly follow the OpenGL spec, - * regardless of the setting of this bit. - * - * On Haswell, both triangle strips and triangle strips with adjacency - * are always ordered in a way that preserves triangle orientation. - * Setting this bit causes the ordering to strictly follow the OpenGL - * spec. - * - * So in either case we want to set the bit. Unfortunately on Ivy - * Bridge this will get the order close to correct but not perfect. - */ - uint32_t dw5 = - ((devinfo->max_gs_threads - 1) << max_threads_shift) | - (gs_prog_data->control_data_header_size_hwords << - GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | - ((gs_prog_data->invocations - 1) << - GEN7_GS_INSTANCE_CONTROL_SHIFT) | - SET_FIELD(vue_prog_data->dispatch_mode, GEN7_GS_DISPATCH_MODE) | - GEN6_GS_STATISTICS_ENABLE | - (gs_prog_data->include_primitive_id ? - GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | - GEN7_GS_REORDER_TRAILING | - GEN7_GS_ENABLE; - uint32_t dw6 = 0; - - if (brw->is_haswell) { - dw6 |= gs_prog_data->control_data_format << - HSW_GS_CONTROL_DATA_FORMAT_SHIFT; - } else { - dw5 |= gs_prog_data->control_data_format << - IVB_GS_CONTROL_DATA_FORMAT_SHIFT; - } - - OUT_BATCH(dw4); - OUT_BATCH(dw5); - OUT_BATCH(dw6); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); /* prog_bo */ - OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | - (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | - (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | - GEN7_GS_INCLUDE_VERTEX_HANDLES | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | - GEN6_GS_STATISTICS_ENABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - brw->gs.enabled = active; -} - -const struct brw_tracked_state gen7_gs_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA, - }, - .emit = upload_gs_state, -}; diff --git a/src/mesa/drivers/dri/i965/gen7_hs_state.c b/src/mesa/drivers/dri/i965/gen7_hs_state.c index 765253f..b4e325b 100644 --- a/src/mesa/drivers/dri/i965/gen7_hs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_hs_state.c @@ -58,66 +58,3 @@ const struct brw_tracked_state gen7_tcs_push_constants = { }, .emit = gen7_upload_tcs_push_constants, }; - -static void -gen7_upload_hs_state(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->tcs.base; - /* BRW_NEW_TESS_PROGRAMS */ - bool active = brw->tess_eval_program; - /* BRW_NEW_TCS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; - const struct brw_tcs_prog_data *tcs_prog_data = - brw_tcs_prog_data(stage_state->prog_data); - - if (active) { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); - OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4), - GEN7_HS_SAMPLER_COUNT) | - SET_FIELD(prog_data->binding_table.size_bytes / 4, - GEN7_HS_BINDING_TABLE_ENTRY_COUNT) | - (devinfo->max_tcs_threads - 1)); - OUT_BATCH(GEN7_HS_ENABLE | - GEN7_HS_STATISTICS_ENABLE | - SET_FIELD(tcs_prog_data->instances - 1, - GEN7_HS_INSTANCE_COUNT)); - OUT_BATCH(stage_state->prog_offset); - if (prog_data->total_scratch) { - OUT_RELOC(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(stage_state->per_thread_scratch) - 11); - } else { - OUT_BATCH(0); - } - OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES | - SET_FIELD(prog_data->dispatch_grf_start_reg, - GEN7_HS_DISPATCH_START_GRF)); - /* Ignore URB semaphores */ - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - brw->tcs.enabled = active; -} - -const struct brw_tracked_state gen7_hs_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = gen7_upload_hs_state, -}; diff --git a/src/mesa/drivers/dri/i965/gen7_viewport_state.c b/src/mesa/drivers/dri/i965/gen7_viewport_state.c deleted file mode 100644 index a3cb454..0000000 --- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "intel_batchbuffer.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/viewport.h" - -static void -gen7_upload_sf_clip_viewport(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - const struct gen_device_info *devinfo = &brw->screen->devinfo; - GLfloat y_scale, y_bias; - struct gen7_sf_clip_viewport *vp; - - /* BRW_NEW_VIEWPORT_COUNT */ - const unsigned viewport_count = brw->clip.viewport_count; - - /* _NEW_BUFFERS */ - struct gl_framebuffer *fb = ctx->DrawBuffer; - const bool render_to_fbo = _mesa_is_user_fbo(fb); - const uint32_t fb_width = _mesa_geometric_width(ctx->DrawBuffer); - const uint32_t fb_height = _mesa_geometric_height(ctx->DrawBuffer); - - vp = brw_state_batch(brw, - sizeof(*vp) * viewport_count, 64, - &brw->sf.vp_offset); - /* Also assign to clip.vp_offset in case something uses it. */ - brw->clip.vp_offset = brw->sf.vp_offset; - - /* _NEW_BUFFERS */ - if (render_to_fbo) { - y_scale = 1.0; - y_bias = 0.0; - } else { - y_scale = -1.0; - y_bias = (float)fb_height; - } - - for (unsigned i = 0; i < viewport_count; i++) { - float scale[3], translate[3]; - _mesa_get_viewport_xform(ctx, i, scale, translate); - - /* _NEW_VIEWPORT */ - vp[i].viewport.m00 = scale[0]; - vp[i].viewport.m11 = scale[1] * y_scale; - vp[i].viewport.m22 = scale[2]; - vp[i].viewport.m30 = translate[0]; - vp[i].viewport.m31 = translate[1] * y_scale + y_bias; - vp[i].viewport.m32 = translate[2]; - - brw_calculate_guardband_size(devinfo, fb_width, fb_height, - vp[i].viewport.m00, vp[i].viewport.m11, - vp[i].viewport.m30, vp[i].viewport.m31, - &vp[i].guardband.xmin, - &vp[i].guardband.xmax, - &vp[i].guardband.ymin, - &vp[i].guardband.ymax); - } - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL << 16 | (2 - 2)); - OUT_BATCH(brw->sf.vp_offset); - ADVANCE_BATCH(); -} - -const struct brw_tracked_state gen7_sf_clip_viewport = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_VIEWPORT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VIEWPORT_COUNT, - }, - .emit = gen7_upload_sf_clip_viewport, -}; diff --git a/src/mesa/drivers/dri/i965/gen8_ds_state.c b/src/mesa/drivers/dri/i965/gen8_ds_state.c deleted file mode 100644 index ee2f82e..0000000 --- a/src/mesa/drivers/dri/i965/gen8_ds_state.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "intel_batchbuffer.h" - -static void -gen8_upload_ds_state(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->tes.base; - /* BRW_NEW_TESS_PROGRAMS */ - bool active = brw->tess_eval_program; - - /* BRW_NEW_TES_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_state->prog_data); - const struct brw_tes_prog_data *tes_prog_data = - brw_tes_prog_data(stage_state->prog_data); - const int ds_pkt_len = brw->gen >= 9 ? 11 : 9; - - if (active) { - BEGIN_BATCH(ds_pkt_len); - OUT_BATCH(_3DSTATE_DS << 16 | (ds_pkt_len - 2)); - OUT_BATCH(stage_state->prog_offset); - OUT_BATCH(0); - OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4), - GEN7_DS_SAMPLER_COUNT) | - SET_FIELD(prog_data->binding_table.size_bytes / 4, - GEN7_DS_BINDING_TABLE_ENTRY_COUNT)); - if (prog_data->total_scratch) { - OUT_RELOC64(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(stage_state->per_thread_scratch) - 11); - } else { - OUT_BATCH(0); - OUT_BATCH(0); - } - OUT_BATCH(SET_FIELD(prog_data->dispatch_grf_start_reg, - GEN7_DS_DISPATCH_START_GRF) | - SET_FIELD(vue_prog_data->urb_read_length, - GEN7_DS_URB_READ_LENGTH)); - - OUT_BATCH(GEN7_DS_ENABLE | - GEN7_DS_STATISTICS_ENABLE | - (devinfo->max_tes_threads - 1) << HSW_DS_MAX_THREADS_SHIFT | - (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ? - GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) | - (tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ? - GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0)); - OUT_BATCH(SET_FIELD(vue_prog_data->cull_distance_mask, - GEN8_DS_USER_CULL_DISTANCE)); - - - if (brw->gen >= 9) { - OUT_BATCH(0); - OUT_BATCH(0); - } - - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(ds_pkt_len); - OUT_BATCH(_3DSTATE_DS << 16 | (ds_pkt_len - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - - if (brw->gen >= 9) { - OUT_BATCH(0); - OUT_BATCH(0); - } - - ADVANCE_BATCH(); - } - - brw->tes.enabled = active; -} - -const struct brw_tracked_state gen8_ds_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TESS_PROGRAMS | - BRW_NEW_TES_PROG_DATA, - }, - .emit = gen8_upload_ds_state, -}; diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c deleted file mode 100644 index 2b74f1b..0000000 --- a/src/mesa/drivers/dri/i965/gen8_gs_state.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "intel_batchbuffer.h" - -static void -gen8_upload_gs_state(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->gs.base; - /* BRW_NEW_GEOMETRY_PROGRAM */ - bool active = brw->geometry_program; - /* BRW_NEW_GS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; - const struct brw_vue_prog_data *vue_prog_data = - brw_vue_prog_data(stage_state->prog_data); - const struct brw_gs_prog_data *gs_prog_data = - brw_gs_prog_data(stage_state->prog_data); - - if (active) { - int urb_entry_write_offset = 1; - uint32_t urb_entry_output_length = - ((vue_prog_data->vue_map.num_slots + 1) / 2 - urb_entry_write_offset); - - if (urb_entry_output_length == 0) - urb_entry_output_length = 1; - - BEGIN_BATCH(10); - OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); - OUT_BATCH(stage_state->prog_offset); - OUT_BATCH(0); - OUT_BATCH(gs_prog_data->vertices_in | - ((ALIGN(stage_state->sampler_count, 4)/4) << - GEN6_GS_SAMPLER_COUNT_SHIFT) | - ((prog_data->binding_table.size_bytes / 4) << - GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - - if (prog_data->total_scratch) { - OUT_RELOC64(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(stage_state->per_thread_scratch) - 11); - } else { - OUT_BATCH(0); - OUT_BATCH(0); - } - - /* DW6 */ - OUT_BATCH(((gs_prog_data->output_vertex_size_hwords * 2 - 1) << - GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) | - (gs_prog_data->output_topology << - GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) | - (vue_prog_data->include_vue_handles ? - GEN7_GS_INCLUDE_VERTEX_HANDLES : 0) | - (vue_prog_data->urb_read_length << - GEN6_GS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) | - (prog_data->dispatch_grf_start_reg << - GEN6_GS_DISPATCH_START_GRF_SHIFT)); - - uint32_t dw7 = (gs_prog_data->control_data_header_size_hwords << - GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) | - SET_FIELD(vue_prog_data->dispatch_mode, - GEN7_GS_DISPATCH_MODE) | - ((gs_prog_data->invocations - 1) << - GEN7_GS_INSTANCE_CONTROL_SHIFT) | - GEN6_GS_STATISTICS_ENABLE | - (gs_prog_data->include_primitive_id ? - GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) | - GEN7_GS_REORDER_TRAILING | - GEN7_GS_ENABLE; - uint32_t dw8 = gs_prog_data->control_data_format << - HSW_GS_CONTROL_DATA_FORMAT_SHIFT; - - if (gs_prog_data->static_vertex_count != -1) { - dw8 |= GEN8_GS_STATIC_OUTPUT | - SET_FIELD(gs_prog_data->static_vertex_count, - GEN8_GS_STATIC_VERTEX_COUNT); - } - - if (brw->gen < 9) - dw7 |= (devinfo->max_gs_threads / 2 - 1) << HSW_GS_MAX_THREADS_SHIFT; - else - dw8 |= devinfo->max_gs_threads - 1; - - /* DW7 */ - OUT_BATCH(dw7); - - /* DW8 */ - OUT_BATCH(dw8); - - /* DW9 */ - OUT_BATCH(vue_prog_data->cull_distance_mask | - (urb_entry_output_length << GEN8_GS_URB_OUTPUT_LENGTH_SHIFT) | - (urb_entry_write_offset << - GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT)); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(10); - OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); - OUT_BATCH(0); /* prog_bo */ - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(GEN6_GS_STATISTICS_ENABLE); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -const struct brw_tracked_state gen8_gs_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_CONTEXT | - BRW_NEW_GEOMETRY_PROGRAM | - BRW_NEW_GS_PROG_DATA, - }, - .emit = gen8_upload_gs_state, -}; diff --git a/src/mesa/drivers/dri/i965/gen8_hs_state.c b/src/mesa/drivers/dri/i965/gen8_hs_state.c deleted file mode 100644 index ee47e5e..0000000 --- a/src/mesa/drivers/dri/i965/gen8_hs_state.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "intel_batchbuffer.h" - -static void -gen8_upload_hs_state(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - const struct brw_stage_state *stage_state = &brw->tcs.base; - /* BRW_NEW_TESS_PROGRAMS */ - bool active = brw->tess_eval_program; - /* BRW_NEW_TCS_PROG_DATA */ - const struct brw_stage_prog_data *prog_data = stage_state->prog_data; - const struct brw_tcs_prog_data *tcs_prog_data = - brw_tcs_prog_data(stage_state->prog_data); - - if (active) { - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2)); - OUT_BATCH(SET_FIELD(DIV_ROUND_UP(stage_state->sampler_count, 4), - GEN7_HS_SAMPLER_COUNT) | - SET_FIELD(prog_data->binding_table.size_bytes / 4, - GEN7_HS_BINDING_TABLE_ENTRY_COUNT)); - OUT_BATCH(GEN7_HS_ENABLE | - GEN7_HS_STATISTICS_ENABLE | - (devinfo->max_tcs_threads - 1) << GEN8_HS_MAX_THREADS_SHIFT | - SET_FIELD(tcs_prog_data->instances - 1, - GEN7_HS_INSTANCE_COUNT)); - OUT_BATCH(stage_state->prog_offset); - OUT_BATCH(0); - if (prog_data->total_scratch) { - OUT_RELOC64(stage_state->scratch_bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - ffs(stage_state->per_thread_scratch) - 11); - } else { - OUT_BATCH(0); - OUT_BATCH(0); - } - OUT_BATCH(GEN7_HS_INCLUDE_VERTEX_HANDLES | - SET_FIELD(prog_data->dispatch_grf_start_reg, - GEN7_HS_DISPATCH_START_GRF)); - OUT_BATCH(0); /* MBZ */ - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - brw->tcs.enabled = active; -} - -const struct brw_tracked_state gen8_hs_state = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_TCS_PROG_DATA | - BRW_NEW_TESS_PROGRAMS, - }, - .emit = gen8_upload_hs_state, -}; diff --git a/src/mesa/drivers/dri/i965/gen8_viewport_state.c b/src/mesa/drivers/dri/i965/gen8_viewport_state.c deleted file mode 100644 index ffb1426..0000000 --- a/src/mesa/drivers/dri/i965/gen8_viewport_state.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "intel_batchbuffer.h" -#include "main/fbobject.h" -#include "main/framebuffer.h" -#include "main/viewport.h" - -static void -gen8_upload_sf_clip_viewport(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - const struct gen_device_info *devinfo = &brw->screen->devinfo; - float y_scale, y_bias; - - /* BRW_NEW_VIEWPORT_COUNT */ - const unsigned viewport_count = brw->clip.viewport_count; - - /* _NEW_BUFFERS */ - struct gl_framebuffer *fb = ctx->DrawBuffer; - const bool render_to_fbo = _mesa_is_user_fbo(fb); - const uint32_t fb_width = _mesa_geometric_width(ctx->DrawBuffer); - const uint32_t fb_height = _mesa_geometric_height(ctx->DrawBuffer); - - float *vp = brw_state_batch(brw, - 16 * 4 * viewport_count, - 64, &brw->sf.vp_offset); - /* Also assign to clip.vp_offset in case something uses it. */ - brw->clip.vp_offset = brw->sf.vp_offset; - - /* _NEW_BUFFERS */ - if (render_to_fbo) { - y_scale = 1.0; - y_bias = 0; - } else { - y_scale = -1.0; - y_bias = (float)fb_height; - } - - for (unsigned i = 0; i < viewport_count; i++) { - float scale[3], translate[3]; - _mesa_get_viewport_xform(ctx, i, scale, translate); - - /* _NEW_VIEWPORT: Viewport Matrix Elements */ - vp[0] = scale[0]; /* m00 */ - vp[1] = scale[1] * y_scale; /* m11 */ - vp[2] = scale[2]; /* m22 */ - vp[3] = translate[0]; /* m30 */ - vp[4] = translate[1] * y_scale + y_bias; /* m31 */ - vp[5] = translate[2]; /* m32 */ - - /* Reserved */ - vp[6] = 0; - vp[7] = 0; - - brw_calculate_guardband_size(devinfo, fb_width, fb_height, - vp[0], vp[1], vp[3], vp[4], - &vp[8], &vp[9], &vp[10], &vp[11]); - - /* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport - * The hardware will take the intersection of the drawing rectangle, - * scissor rectangle, and the viewport extents. We don't need to be - * smart, and can therefore just program the viewport extents. - */ - float viewport_Xmax = ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width; - float viewport_Ymax = ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height; - if (render_to_fbo) { - vp[12] = ctx->ViewportArray[i].X; - vp[13] = viewport_Xmax - 1; - vp[14] = ctx->ViewportArray[i].Y; - vp[15] = viewport_Ymax - 1; - } else { - vp[12] = ctx->ViewportArray[i].X; - vp[13] = viewport_Xmax - 1; - vp[14] = fb_height - viewport_Ymax; - vp[15] = fb_height - ctx->ViewportArray[i].Y - 1; - } - - vp += 16; - } - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL << 16 | (2 - 2)); - OUT_BATCH(brw->sf.vp_offset); - ADVANCE_BATCH(); -} - -const struct brw_tracked_state gen8_sf_clip_viewport = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_VIEWPORT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VIEWPORT_COUNT, - }, - .emit = gen8_upload_sf_clip_viewport, -}; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 2ff809f..dd7db7a 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -42,6 +42,7 @@ #include "main/framebuffer.h" #include "main/stencil.h" #include "main/transformfeedback.h" +#include "main/viewport.h" UNUSED static void * emit_dwords(struct brw_context *brw, unsigned n) @@ -1075,6 +1076,379 @@ static const struct brw_tracked_state genX(vs_state) = { .emit = genX(upload_vs_state), }; +/* ---------------------------------------------------------------------- */ + +static void +brw_calculate_guardband_size(const struct gen_device_info *devinfo, + uint32_t fb_width, uint32_t fb_height, + float m00, float m11, float m30, float m31, + float *xmin, float *xmax, + float *ymin, float *ymax) +{ + /* According to the "Vertex X,Y Clamping and Quantization" section of the + * Strips and Fans documentation: + * + * "The vertex X and Y screen-space coordinates are also /clamped/ to the + * fixed-point "guardband" range supported by the rasterization hardware" + * + * and + * + * "In almost all circumstances, if an object’s vertices are actually + * modified by this clamping (i.e., had X or Y coordinates outside of + * the guardband extent the rendered object will not match the intended + * result. Therefore software should take steps to ensure that this does + * not happen - e.g., by clipping objects such that they do not exceed + * these limits after the Drawing Rectangle is applied." + * + * I believe the fundamental restriction is that the rasterizer (in + * the SF/WM stages) have a limit on the number of pixels that can be + * rasterized. We need to ensure any coordinates beyond the rasterizer + * limit are handled by the clipper. So effectively that limit becomes + * the clipper's guardband size. + * + * It goes on to say: + * + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * This makes no sense. Gen7+ hardware supports 16K render targets, + * and you definitely need to be able to draw polygons that fill the + * surface. Our assumption is that the rasterizer was limited to 8K + * on Sandybridge, which only supports 8K surfaces, and it was actually + * increased to 16K on Ivybridge and later. + * + * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge. + */ + const float gb_size = devinfo->gen >= 7 ? 16384.0f : 8192.0f; + + if (m00 != 0 && m11 != 0) { + /* First, we compute the screen-space render area */ + const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00); + const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00); + const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11); + const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11); + + /* We want the guardband to be centered on that */ + const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size; + const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size; + const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size; + const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size; + + /* Now we need it in native device coordinates */ + const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00; + const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00; + const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11; + const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11; + + /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be + * flipped upside-down. X should be fine though. + */ + assert(ndc_gb_xmin <= ndc_gb_xmax); + *xmin = ndc_gb_xmin; + *xmax = ndc_gb_xmax; + *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax); + *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax); + } else { + /* The viewport scales to 0, so nothing will be rendered. */ + *xmin = 0.0f; + *xmax = 0.0f; + *ymin = 0.0f; + *ymax = 0.0f; + } +} + +static void +genX(upload_sf_clip_viewport)(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + float y_scale, y_bias; + const struct gen_device_info *devinfo = &brw->screen->devinfo; + + /* BRW_NEW_VIEWPORT_COUNT */ + const unsigned viewport_count = brw->clip.viewport_count; + + /* _NEW_BUFFERS */ + const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + const uint32_t fb_width = (float)_mesa_geometric_width(ctx->DrawBuffer); + const uint32_t fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer); + +#if GEN_GEN >= 7 +#define clv sfv + struct GENX(SF_CLIP_VIEWPORT) sfv; + uint32_t sf_clip_vp_offset; + uint32_t *sf_clip_map = brw_state_batch(brw, 16 * 4 * viewport_count, + 64, &sf_clip_vp_offset); +#else + struct GENX(SF_VIEWPORT) sfv; + struct GENX(CLIP_VIEWPORT) clv; + uint32_t *sf_map = brw_state_batch(brw, 8 * 4 * viewport_count, + 32, &brw->sf.vp_offset); + uint32_t *clip_map = brw_state_batch(brw, 4 * 4 * viewport_count, + 32, &brw->clip.vp_offset); +#endif + + /* _NEW_BUFFERS */ + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = (float)fb_height; + } + + for (unsigned i = 0; i < brw->clip.viewport_count; i++) { + /* _NEW_VIEWPORT: Guardband Clipping */ + float scale[3], translate[3], gb_xmin, gb_xmax, gb_ymin, gb_ymax; + _mesa_get_viewport_xform(ctx, i, scale, translate); + + sfv.ViewportMatrixElementm00 = scale[0]; + sfv.ViewportMatrixElementm11 = scale[1] * y_scale, + sfv.ViewportMatrixElementm22 = scale[2], + sfv.ViewportMatrixElementm30 = translate[0], + sfv.ViewportMatrixElementm31 = translate[1] * y_scale + y_bias, + sfv.ViewportMatrixElementm32 = translate[2], + brw_calculate_guardband_size(devinfo, fb_width, fb_height, + sfv.ViewportMatrixElementm00, + sfv.ViewportMatrixElementm11, + sfv.ViewportMatrixElementm30, + sfv.ViewportMatrixElementm31, + &gb_xmin, &gb_xmax, &gb_ymin, &gb_ymax); + + + clv.XMinClipGuardband = gb_xmin; + clv.XMaxClipGuardband = gb_xmax; + clv.YMinClipGuardband = gb_ymin; + clv.YMaxClipGuardband = gb_ymax; + +#if GEN_GEN >= 8 + /* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport + * The hardware will take the intersection of the drawing rectangle, + * scissor rectangle, and the viewport extents. We don't need to be + * smart, and can therefore just program the viewport extents. + */ + const float viewport_Xmax = + ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width; + const float viewport_Ymax = + ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height; + + if (render_to_fbo) { + sfv.XMinViewPort = ctx->ViewportArray[i].X; + sfv.XMaxViewPort = viewport_Xmax - 1; + sfv.YMinViewPort = ctx->ViewportArray[i].Y; + sfv.YMaxViewPort = viewport_Ymax - 1; + } else { + sfv.XMinViewPort = ctx->ViewportArray[i].X; + sfv.XMaxViewPort = viewport_Xmax - 1; + sfv.YMinViewPort = fb_height - viewport_Ymax; + sfv.YMaxViewPort = fb_height - ctx->ViewportArray[i].Y - 1; + } +#endif + +#if GEN_GEN >= 7 + GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_map, &sfv); + sf_clip_map += 16; +#else + GENX(SF_VIEWPORT_pack)(NULL, sf_map, &sfv); + GENX(CLIP_VIEWPORT_pack)(NULL, clip_map, &clv); + sf_map += 8; + clip_map += 4; +#endif + } + +#if GEN_GEN >= 7 + brw_batch_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { + ptr.SFClipViewportPointer = sf_clip_vp_offset; + } +#else + brw->ctx.NewDriverState |= BRW_NEW_SF_VP | BRW_NEW_CLIP_VP; +#endif +} + +static const struct brw_tracked_state genX(sf_clip_viewport) = { + .dirty = { + .mesa = _NEW_BUFFERS | + _NEW_VIEWPORT, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_VIEWPORT_COUNT, + }, + .emit = genX(upload_sf_clip_viewport), +}; + +static void +genX(upload_gs_state)(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + const struct brw_stage_state *stage_state = &brw->gs.base; + /* BRW_NEW_GEOMETRY_PROGRAM */ + bool active = brw->geometry_program; + + /* BRW_NEW_GS_PROG_DATA */ + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; + const struct brw_vue_prog_data *vue_prog_data = + brw_vue_prog_data(stage_prog_data); +#if GEN_GEN >= 7 + const struct brw_gs_prog_data *gs_prog_data = + brw_gs_prog_data(stage_prog_data); +#endif + +#if GEN_GEN < 7 + brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) { + if (active && stage_state->push_const_size != 0) { + cgs.Buffer0Valid = true; + cgs.PointertoGSConstantBuffer0 = stage_state->push_const_offset; + cgs.GSConstantBuffer0ReadLength = stage_state->push_const_size - 1; + } + } +#endif + +#if GEN_GEN == 7 && !GEN_IS_HASWELL + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (brw->gt == 2 && brw->gs.enabled != active) + gen7_emit_cs_stall_flush(brw); +#endif + + if (active) { + brw_batch_emit(brw, GENX(3DSTATE_GS), gs) { + INIT_THREAD_DISPATCH_FIELDS(gs, Vertex); + +#if GEN_GEN >= 7 + gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; + gs.OutputTopology = gs_prog_data->output_topology; + gs.ControlDataHeaderSize = + gs_prog_data->control_data_header_size_hwords; + + gs.InstanceControl = gs_prog_data->invocations - 1; + gs.DispatchMode = vue_prog_data->dispatch_mode; + + gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; + + gs.ControlDataFormat = gs_prog_data->control_data_format; +#endif + + /* Note: the meaning of the GEN7_GS_REORDER_TRAILING bit changes between + * Ivy Bridge and Haswell. + * + * On Ivy Bridge, setting this bit causes the vertices of a triangle + * strip to be delivered to the geometry shader in an order that does + * not strictly follow the OpenGL spec, but preserves triangle + * orientation. For example, if the vertices are (1, 2, 3, 4, 5), then + * the geometry shader sees triangles: + * + * (1, 2, 3), (2, 4, 3), (3, 4, 5) + * + * (Clearing the bit is even worse, because it fails to preserve + * orientation). + * + * Triangle strips with adjacency always ordered in a way that preserves + * triangle orientation but does not strictly follow the OpenGL spec, + * regardless of the setting of this bit. + * + * On Haswell, both triangle strips and triangle strips with adjacency + * are always ordered in a way that preserves triangle orientation. + * Setting this bit causes the ordering to strictly follow the OpenGL + * spec. + * + * So in either case we want to set the bit. Unfortunately on Ivy + * Bridge this will get the order close to correct but not perfect. + */ + gs.ReorderMode = TRAILING; + gs.MaximumNumberofThreads = + GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1) + : (devinfo->max_gs_threads - 1); + +#if GEN_GEN < 7 + gs.SOStatisticsEnable = true; + gs.RenderingEnabled = 1; + if (brw->geometry_program->info.has_transform_feedback_varyings) + gs.SVBIPayloadEnable = true; + + /* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it + * was previously done for gen6. + * + * TODO: test with both disabled to see if the HW is behaving + * as expected, like in gen7. + */ + gs.SingleProgramFlow = true; + gs.VectorMaskEnable = true; +#endif + +#if GEN_GEN >= 8 + gs.ExpectedVertexCount = gs_prog_data->vertices_in; + + if (gs_prog_data->static_vertex_count != -1) { + gs.StaticOutput = true; + gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; + } + gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; + + gs.UserClipDistanceCullTestEnableBitmask = + vue_prog_data->cull_distance_mask; + + const int urb_entry_write_offset = 1; + const uint32_t urb_entry_output_length = + DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - + urb_entry_write_offset; + + gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; + gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); +#endif + } +#if GEN_GEN < 7 + } else if (brw->ff_gs.prog_active) { + /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS + * program. This function provides the needed 3DSTATE_GS for this. + */ + upload_gs_state_for_tf(brw); +#endif + } else { + brw_batch_emit(brw, GENX(3DSTATE_GS), gs) { + gs.StatisticsEnable = true; +#if GEN_GEN < 7 + gs.RenderingEnabled = true; +#endif + +#if GEN_GEN < 8 + gs.DispatchGRFStartRegisterForURBData = 1; +#if GEN_GEN >= 7 + gs.IncludeVertexHandles = true; +#endif +#endif + } + } +#if GEN_GEN < 7 + brw->gs.enabled = active; +#endif +} + +static const struct brw_tracked_state genX(gs_state) = { + .dirty = { + .mesa = (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0), + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_CONTEXT | + BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_GS_PROG_DATA | + (GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA | + BRW_NEW_PUSH_CONSTANT_ALLOCATION + : 0), + }, + .emit = genX(upload_gs_state), +}; + #endif /* ---------------------------------------------------------------------- */ @@ -1630,6 +2004,90 @@ static const struct brw_tracked_state genX(ps_state) = { .emit = genX(upload_ps), }; +/* ---------------------------------------------------------------------- */ + +static void +genX(upload_hs_state)(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + struct brw_stage_state *stage_state = &brw->tcs.base; + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; + const struct brw_vue_prog_data *vue_prog_data = + brw_vue_prog_data(stage_prog_data); + + /* BRW_NEW_TES_PROG_DATA */ + struct brw_tcs_prog_data *tcs_prog_data = + brw_tcs_prog_data(stage_prog_data); + + if (!tcs_prog_data) { + brw_batch_emit(brw, GENX(3DSTATE_HS), hs); + } else { + brw_batch_emit(brw, GENX(3DSTATE_HS), hs) { + INIT_THREAD_DISPATCH_FIELDS(hs, Vertex); + + hs.InstanceCount = tcs_prog_data->instances - 1; + hs.IncludeVertexHandles = true; + + hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; + } + } +} + +static const struct brw_tracked_state genX(hs_state) = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_TCS_PROG_DATA | + BRW_NEW_TESS_PROGRAMS, + }, + .emit = genX(upload_hs_state), +}; + +static void +genX(upload_ds_state)(struct brw_context *brw) +{ + const struct gen_device_info *devinfo = &brw->screen->devinfo; + const struct brw_stage_state *stage_state = &brw->tes.base; + struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data; + + /* BRW_NEW_TES_PROG_DATA */ + const struct brw_tes_prog_data *tes_prog_data = + brw_tes_prog_data(stage_prog_data); + const struct brw_vue_prog_data *vue_prog_data = + brw_vue_prog_data(stage_prog_data); + + if (!tes_prog_data) { + brw_batch_emit(brw, GENX(3DSTATE_DS), ds); + } else { + brw_batch_emit(brw, GENX(3DSTATE_DS), ds) { + INIT_THREAD_DISPATCH_FIELDS(ds, Patch); + + ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; + ds.ComputeWCoordinateEnable = + tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; + +#if GEN_GEN >= 8 + if (vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8) + ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; + ds.UserClipDistanceCullTestEnableBitmask = + vue_prog_data->cull_distance_mask; +#endif + } + } +} + +static const struct brw_tracked_state genX(ds_state) = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_TESS_PROGRAMS | + BRW_NEW_TES_PROG_DATA, + }, + .emit = genX(upload_ds_state), +}; + #endif /* ---------------------------------------------------------------------- */ @@ -1907,7 +2365,7 @@ genX(init_atoms)(struct brw_context *brw) #elif GEN_GEN == 6 static const struct brw_tracked_state *render_atoms[] = { - &gen6_sf_and_clip_viewports, + &genX(sf_clip_viewport), /* Command packets: */ @@ -1947,7 +2405,7 @@ genX(init_atoms)(struct brw_context *brw) &gen6_multisample_state, &genX(vs_state), - &gen6_gs_state, + &genX(gs_state), &genX(clip_state), &genX(sf_state), &genX(wm_state), @@ -1975,7 +2433,7 @@ genX(init_atoms)(struct brw_context *brw) /* Command packets: */ &brw_cc_vp, - &gen7_sf_clip_viewport, + &genX(sf_clip_viewport), &gen7_l3_state, &gen7_push_constant_space, @@ -2031,10 +2489,10 @@ genX(init_atoms)(struct brw_context *brw) &gen6_multisample_state, &genX(vs_state), - &gen7_hs_state, + &genX(hs_state), &gen7_te_state, - &gen7_ds_state, - &gen7_gs_state, + &genX(ds_state), + &genX(gs_state), &genX(sol_state), &genX(clip_state), &genX(sbe_state), @@ -2063,7 +2521,7 @@ genX(init_atoms)(struct brw_context *brw) static const struct brw_tracked_state *render_atoms[] = { &brw_cc_vp, - &gen8_sf_clip_viewport, + &genX(sf_clip_viewport), &gen7_l3_state, &gen7_push_constant_space, @@ -2118,10 +2576,10 @@ genX(init_atoms)(struct brw_context *brw) &gen8_multisample_state, &genX(vs_state), - &gen8_hs_state, + &genX(hs_state), &gen7_te_state, - &gen8_ds_state, - &gen8_gs_state, + &genX(ds_state), + &genX(gs_state), &genX(sol_state), &genX(clip_state), &genX(raster_state), -- 2.7.4