From 586b3894744819071bb1ad56383e3c0d9e5b7e1f Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Mon, 30 Apr 2012 14:29:35 -0700 Subject: [PATCH] i965: split gen{6,7}_blorp_exec functions into manageable chunks. This patch splits up the gen6_blorp_exec and gen7_blorp_exec functions, which were very long, into simple component functions. With a few exceptions, there is one function per state packet. This will allow blit functionality to be added without significantly complicating the code. Reviewed-by: Chad Versace v2: Rename the functions gen{6,7}_emit_wm_disable() to gen{6,7}_emit_wm_config() (since the WM is not actually disabled during HiZ ops; it simply doesn't have a program). Also, on gen7, split out the configration of 3DSTATE_PS to a separate function gen7_emit_ps_config(). --- src/mesa/drivers/dri/i965/brw_blorp.h | 23 +- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 596 ++++++++++++++++++------------- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 550 ++++++++++++++-------------- 3 files changed, 647 insertions(+), 522 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index d250db5..c1c8334 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -108,6 +108,10 @@ void gen6_blorp_init(struct brw_context *brw); void +gen6_blorp_compute_tile_masks(const brw_blorp_params *params, + uint32_t *tile_mask_x, uint32_t *tile_mask_y); + +void gen6_blorp_emit_batch_head(struct brw_context *brw, const brw_blorp_params *params); @@ -116,7 +120,22 @@ gen6_blorp_emit_vertices(struct brw_context *brw, const brw_blorp_params *params); void +gen6_blorp_emit_vs_disable(struct brw_context *brw, + const brw_blorp_params *params); + +uint32_t gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, - const brw_blorp_params *params, - uint32_t *out_offset); + const brw_blorp_params *params); + +void +gen6_blorp_emit_gs_disable(struct brw_context *brw, + const brw_blorp_params *params); + +void +gen6_blorp_emit_clip_disable(struct brw_context *brw, + const brw_blorp_params *params); + +void +gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, + const brw_blorp_params *params); /** \} */ diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 0b1620a..33a1035 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -45,6 +45,31 @@ * sizeof(float)) /** \} */ + +/** + * Compute masks to determine how much of draw_x and draw_y should be + * performed using the fine adjustment of "depth coordinate offset X/Y" + * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for + * details. + */ +void +gen6_blorp_compute_tile_masks(const brw_blorp_params *params, + uint32_t *tile_mask_x, uint32_t *tile_mask_y) +{ + uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y; + intel_region_get_tile_masks(params->depth.mt->region, + &depth_mask_x, &depth_mask_y); + intel_region_get_tile_masks(params->depth.mt->hiz_mt->region, + &hiz_mask_x, &hiz_mask_y); + + /* Each HiZ row represents 2 rows of pixels */ + hiz_mask_y = hiz_mask_y << 1 | 1; + + *tile_mask_x = depth_mask_x | hiz_mask_x; + *tile_mask_y = depth_mask_y | hiz_mask_y; +} + + void gen6_blorp_emit_batch_head(struct brw_context *brw, const brw_blorp_params *params) @@ -234,224 +259,268 @@ gen6_blorp_emit_vertices(struct brw_context *brw, } } -/** - * \brief Execute a blit or render pass operation. + +/* 3DSTATE_URB * - * To execute the operation, this function manually constructs and emits a - * batch to draw a rectangle primitive. The batchbuffer is flushed before - * constructing and after emitting the batch. + * Assign the entire URB to the VS. Even though the VS disabled, URB space + * is still needed because the clipper loads the VUE's from the URB. From + * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, + * Dword 1.15:0 "VS Number of URB Entries": + * This field is always used (even if VS Function Enable is DISABLED). * - * This function alters no GL state. + * The warning below appears in the PRM (Section 3DSTATE_URB), but we can + * safely ignore it because this batch contains only one draw call. + * Because of URB corruption caused by allocating a previous GS unit + * URB entry to the VS unit, software is required to send a “GS NULL + * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) + * plus a dummy DRAW call before any case where VS will be taking over + * GS URB space. */ -void -gen6_blorp_exec(struct intel_context *intel, - const brw_blorp_params *params) +static void +gen6_blorp_emit_urb_config(struct brw_context *brw, + const brw_blorp_params *params) { - struct gl_context *ctx = &intel->ctx; - struct brw_context *brw = brw_context(ctx); - uint32_t draw_x, draw_y; - uint32_t tile_mask_x, tile_mask_y; - - params->depth.get_draw_offsets(&draw_x, &draw_y); + struct intel_context *intel = &brw->intel; - /* Compute masks to determine how much of draw_x and draw_y should be - * performed using the fine adjustment of "depth coordinate offset X/Y" - * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for - * details. - */ - { - uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y; - intel_region_get_tile_masks(params->depth.mt->region, - &depth_mask_x, &depth_mask_y); - intel_region_get_tile_masks(params->depth.mt->hiz_mt->region, - &hiz_mask_x, &hiz_mask_y); + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); + OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); + OUT_BATCH(0); + ADVANCE_BATCH(); +} - /* Each HiZ row represents 2 rows of pixels */ - hiz_mask_y = hiz_mask_y << 1 | 1; - tile_mask_x = depth_mask_x | hiz_mask_x; - tile_mask_y = depth_mask_y | hiz_mask_y; - } +/** + * \param out_offset is relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +uint32_t +gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, + const brw_blorp_params *params) +{ + uint32_t depthstencil_offset; - gen6_blorp_emit_batch_head(brw, params); - gen6_blorp_emit_vertices(brw, params); + struct gen6_depth_stencil_state *state; + state = (struct gen6_depth_stencil_state *) + brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*state), 64, + &depthstencil_offset); + memset(state, 0, sizeof(*state)); - /* 3DSTATE_URB - * - * Assign the entire URB to the VS. Even though the VS disabled, URB space - * is still needed because the clipper loads the VUE's from the URB. From - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, - * Dword 1.15:0 "VS Number of URB Entries": - * This field is always used (even if VS Function Enable is DISABLED). - * - * The warning below appears in the PRM (Section 3DSTATE_URB), but we can - * safely ignore it because this batch contains only one draw call. - * Because of URB corruption caused by allocating a previous GS unit - * URB entry to the VS unit, software is required to send a “GS NULL - * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) - * plus a dummy DRAW call before any case where VS will be taking over - * GS URB space. + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve */ - { - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); - OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); - OUT_BATCH(0); - ADVANCE_BATCH(); + state->ds2.depth_write_enable = 1; + if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { + state->ds2.depth_test_enable = 1; + state->ds2.depth_test_func = COMPAREFUNC_NEVER; } - /* 3DSTATE_CC_STATE_POINTERS - * - * The pointer offsets are relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - * - * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. - */ - { - uint32_t depthstencil_offset; - gen6_blorp_emit_depth_stencil_state(brw, params, &depthstencil_offset); - - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); - OUT_BATCH(1); /* BLEND_STATE offset */ - OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ - OUT_BATCH(1); /* COLOR_CALC_STATE offset */ - ADVANCE_BATCH(); - } + return depthstencil_offset; +} - /* 3DSTATE_VS - * - * Disable vertex shader. - */ - { + +/* 3DSTATE_CC_STATE_POINTERS + * + * The pointer offsets are relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + * + * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. + */ +static void +gen6_blorp_emit_cc_state_pointers(struct brw_context *brw, + const brw_blorp_params *params, + uint32_t depthstencil_offset) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_BATCH(1); /* BLEND_STATE offset */ + OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ + OUT_BATCH(1); /* COLOR_CALC_STATE offset */ + ADVANCE_BATCH(); +} + + +/* 3DSTATE_VS + * + * Disable vertex shader. + */ +void +gen6_blorp_emit_vs_disable(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + if (intel->gen == 6) { /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section * 3DSTATE_VS, Dword 5.0 "VS Function Enable": - * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS - * command that causes the VS Function Enable to toggle. Pipeline - * flush can be executed by sending a PIPE_CONTROL command with CS - * stall bit set and a post sync operation. + * + * [DevSNB] A pipeline flush must be programmed prior to a + * 3DSTATE_VS command that causes the VS Function Enable to + * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL + * command with CS stall bit set and a post sync operation. */ intel_emit_post_sync_nonzero_flush(intel); - - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); } - /* 3DSTATE_GS - * - * Disable the geometry shader. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} - /* 3DSTATE_CLIP - * - * Disable the clipper. - * - * The BLORP op emits a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 "3D Primitives Overview": - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - * - * Also disable perspective divide. This doesn't change the clipper's - * output, but does spare a few electrons. - */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + +/* 3DSTATE_GS + * + * Disable the geometry shader. + */ +void +gen6_blorp_emit_gs_disable(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The BLORP op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ +void +gen6_blorp_emit_clip_disable(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw2.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) + * and BackFaceFillMode (dw2.5:6) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ +static void +gen6_blorp_emit_sf_config(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(20); + OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); + OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 18; ++i) OUT_BATCH(0); - ADVANCE_BATCH(); - } + ADVANCE_BATCH(); +} - /* 3DSTATE_SF - * - * Disable ViewportTransformEnable (dw2.1) - * - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - * - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) - * and BackFaceFillMode (dw2.5:6) to SOLID(0). - * - * From the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - */ - { - BEGIN_BATCH(20); - OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); - OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */ - 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - 0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); - for (int i = 0; i < 18; ++i) - OUT_BATCH(0); - ADVANCE_BATCH(); - } - /* 3DSTATE_WM - * - * Disable thread dispatch (dw5.19) and enable the HiZ op. - * - * Even though thread dispatch is disabled, max threads (dw5.25:31) must be +/** + * Disable thread dispatch (dw5.19) and enable the HiZ op. + */ +static void +gen6_blorp_emit_wm_config(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + /* Even though thread dispatch is disabled, max threads (dw5.25:31) must be * nonzero to prevent the GPU from hanging. See the valid ranges in the * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31 * "Maximum Number Of Threads". */ - { - uint32_t dw4 = 0; - - switch (params->hiz_op) { - case GEN6_HIZ_OP_DEPTH_CLEAR: - assert(!"not implemented"); - dw4 |= GEN6_WM_DEPTH_CLEAR; - break; - case GEN6_HIZ_OP_DEPTH_RESOLVE: - dw4 |= GEN6_WM_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_HIZ_RESOLVE: - dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - default: - assert(0); - break; - } - - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(dw4); - OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); - OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + uint32_t dw4 = 0; + + switch (params->hiz_op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw4 |= GEN6_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; } + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(dw4); + OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT); + OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +static void +gen6_blorp_emit_depth_stencil_config(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + uint32_t draw_x, draw_y; + uint32_t tile_mask_x, tile_mask_y; + + gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y); + params->depth.get_draw_offsets(&draw_x, &draw_y); + /* 3DSTATE_DEPTH_BUFFER */ { uint32_t width, height; @@ -534,81 +603,106 @@ gen6_blorp_exec(struct intel_context *intel, OUT_BATCH(0); ADVANCE_BATCH(); } +} - /* 3DSTATE_CLEAR_PARAMS - * - * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: - * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE - * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. - */ - { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - /* 3DSTATE_DRAWING_RECTANGLE */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(((params->x1 - 1) & 0xffff) | - ((params->y1 - 1) << 16)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } +/* 3DSTATE_CLEAR_PARAMS + * + * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: + * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE + * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. + */ +static void +gen6_blorp_emit_clear_params(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; - /* 3DPRIMITIVE */ - { - BEGIN_BATCH(6); - OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | - _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | - GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); - OUT_BATCH(1); /* instance count */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} - /* See comments above at first invocation of intel_flush() in - * gen6_blorp_emit_batch_head(). - */ - intel_flush(ctx); - /* Be safe. */ - brw->state.dirty.brw = ~0; - brw->state.dirty.cache = ~0; +/* 3DSTATE_DRAWING_RECTANGLE */ +void +gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((params->x1 - 1) & 0xffff) | + ((params->y1 - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DPRIMITIVE */ +static void +gen6_blorp_emit_primitive(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | + _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); } + /** - * \param out_offset is relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + * \brief Execute a blit or render pass operation. + * + * To execute the operation, this function manually constructs and emits a + * batch to draw a rectangle primitive. The batchbuffer is flushed before + * constructing and after emitting the batch. + * + * This function alters no GL state. */ void -gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, - const brw_blorp_params *params, - uint32_t *out_offset) +gen6_blorp_exec(struct intel_context *intel, + const brw_blorp_params *params) { - struct gen6_depth_stencil_state *state; - state = (struct gen6_depth_stencil_state *) - brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, - sizeof(*state), 64, - out_offset); - memset(state, 0, sizeof(*state)); + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + uint32_t depthstencil_offset; - /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve + gen6_blorp_emit_batch_head(brw, params); + gen6_blorp_emit_vertices(brw, params); + gen6_blorp_emit_urb_config(brw, params); + depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); + gen6_blorp_emit_cc_state_pointers(brw, params, depthstencil_offset); + gen6_blorp_emit_vs_disable(brw, params); + gen6_blorp_emit_gs_disable(brw, params); + gen6_blorp_emit_clip_disable(brw, params); + gen6_blorp_emit_sf_config(brw, params); + gen6_blorp_emit_wm_config(brw, params); + + gen6_blorp_emit_depth_stencil_config(brw, params); + gen6_blorp_emit_clear_params(brw, params); + gen6_blorp_emit_drawing_rectangle(brw, params); + gen6_blorp_emit_primitive(brw, params); + + /* See comments above at first invocation of intel_flush() in + * gen6_blorp_emit_batch_head(). */ - state->ds2.depth_write_enable = 1; - if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { - state->ds2.depth_test_enable = 1; - state->ds2.depth_test_func = COMPAREFUNC_NEVER; - } + intel_flush(ctx); + + /* Be safe. */ + brw->state.dirty.brw = ~0; + brw->state.dirty.cache = ~0; } /** \see intel_context::vtbl::resolve_hiz_slice */ diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 3172c4f..64badcc 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -34,205 +34,156 @@ #include "brw_blorp.h" #include "gen7_blorp.h" -/** - * \copydoc gen6_blorp_exec() + +/* 3DSTATE_URB_VS + * 3DSTATE_URB_HS + * 3DSTATE_URB_DS + * 3DSTATE_URB_GS + * + * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the + * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS: + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state to be + * valid. */ -void -gen7_blorp_exec(struct intel_context *intel, - const brw_blorp_params *params) +static void +gen7_blorp_emit_urb_config(struct brw_context *brw, + const brw_blorp_params *params) { - struct gl_context *ctx = &intel->ctx; - struct brw_context *brw = brw_context(ctx); - uint32_t draw_x, draw_y; - uint32_t tile_mask_x, tile_mask_y; - - params->depth.get_draw_offsets(&draw_x, &draw_y); + struct intel_context *intel = &brw->intel; - /* Compute masks to determine how much of draw_x and draw_y should be - * performed using the fine adjustment of "depth coordinate offset X/Y" - * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for - * details. + /* The minimum valid value is 32. See 3DSTATE_URB_VS, + * Dword 1.15:0 "VS Number of URB Entries". */ - { - uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y; - intel_region_get_tile_masks(params->depth.mt->region, - &depth_mask_x, &depth_mask_y); - intel_region_get_tile_masks(params->depth.mt->hiz_mt->region, - &hiz_mask_x, &hiz_mask_y); - - /* Each HiZ row represents 2 rows of pixels */ - hiz_mask_y = hiz_mask_y << 1 | 1; - - tile_mask_x = depth_mask_x | hiz_mask_x; - tile_mask_y = depth_mask_y | hiz_mask_y; - } + int num_vs_entries = 32; + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); + OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT | + 0 << GEN7_URB_STARTING_ADDRESS_SHIFT | + num_vs_entries); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} - gen6_blorp_emit_batch_head(brw, params); - gen6_blorp_emit_vertices(brw, params); - /* 3DSTATE_URB_VS - * 3DSTATE_URB_HS - * 3DSTATE_URB_DS - * 3DSTATE_URB_GS - * - * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the - * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS: - * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be - * programmed in order for the programming of this state to be - * valid. - */ - { - /* The minimum valid value is 32. See 3DSTATE_URB_VS, - * Dword 1.15:0 "VS Number of URB Entries". - */ - int num_vs_entries = 32; +/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS + * + * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +static void +gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw, + const brw_blorp_params *params, + uint32_t depthstencil_offset) +{ + struct intel_context *intel = &brw->intel; - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); - OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT | - 0 << GEN7_URB_STARTING_ADDRESS_SHIFT | - num_vs_entries); - ADVANCE_BATCH(); + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(depthstencil_offset | 1); + ADVANCE_BATCH(); +} - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); +/* 3DSTATE_HS + * + * Disable the hull shader. + */ +static void +gen7_blorp_emit_hs_disable(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS - * - * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - */ - { - uint32_t depthstencil_offset; - gen6_blorp_emit_depth_stencil_state(brw, params, &depthstencil_offset); +/* 3DSTATE_TE + * + * Disable the tesselation engine. + */ +static void +gen7_blorp_emit_te_disable(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(depthstencil_offset | 1); - ADVANCE_BATCH(); - } - /* 3DSTATE_VS - * - * Disable vertex shader. - */ - { - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_HS - * - * Disable the hull shader. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } +/* 3DSTATE_DS + * + * Disable the domain shader. + */ +static void +gen7_blorp_emit_ds_disable(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} - /* 3DSTATE_TE - * - * Disable the tesselation engine. - */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - /* 3DSTATE_DS - * - * Disable the domain shader. - */ - { - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } +/* 3DSTATE_STREAMOUT + * + * Disable streamout. + */ +static void +gen7_blorp_emit_streamout_disable(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; - /* 3DSTATE_GS - * - * Disable the geometry shader. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} - /* 3DSTATE_STREAMOUT - * - * Disable streamout. - */ - { - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - /* 3DSTATE_CLIP - * - * Disable the clipper. - * - * The BLORP op emits a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 "3D Primitives Overview": - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - * - * Also disable perspective divide. This doesn't change the clipper's - * output, but does spare a few electrons. - */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } +static void +gen7_blorp_emit_sf_config(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; /* 3DSTATE_SF * @@ -276,60 +227,85 @@ gen7_blorp_exec(struct intel_context *intel, OUT_BATCH(0); ADVANCE_BATCH(); } +} - /* 3DSTATE_WM - * - * Disable PS thread dispatch (dw1.29) and enable the HiZ op. - */ - { - uint32_t dw1 = 0; - - switch (params->hiz_op) { - case GEN6_HIZ_OP_DEPTH_CLEAR: - assert(!"not implemented"); - dw1 |= GEN7_WM_DEPTH_CLEAR; - break; - case GEN6_HIZ_OP_DEPTH_RESOLVE: - dw1 |= GEN7_WM_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_HIZ_RESOLVE: - dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - default: - assert(0); - break; - } - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); - OUT_BATCH(dw1); - OUT_BATCH(0); - ADVANCE_BATCH(); +/** + * Disable thread dispatch (dw5.19) and enable the HiZ op. + */ +static void +gen7_blorp_emit_wm_config(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + uint32_t dw1 = 0; + + switch (params->hiz_op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + assert(!"not implemented"); + dw1 |= GEN7_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw1 |= GEN7_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + default: + assert(0); + break; } - /* 3DSTATE_PS - * - * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite - * that, thread dispatch info must still be specified. - * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec - * states that the valid range for this field is [0x3, 0x2f]. - * - A dispatch mode must be given; that is, at least one of the - * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was - * discovered through simulator error messages. - */ - { - BEGIN_BATCH(8); - OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(((brw->max_wm_threads - 1) << IVB_PS_MAX_THREADS_SHIFT) | - GEN7_PS_32_DISPATCH_ENABLE); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/** + * 3DSTATE_PS + * + * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite + * that, thread dispatch info must still be specified. + * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec + * states that the valid range for this field is [0x3, 0x2f]. + * - A dispatch mode must be given; that is, at least one of the + * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was + * discovered through simulator error messages. + */ +static void +gen7_blorp_emit_ps_config(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(8); + OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(((brw->max_wm_threads - 1) << IVB_PS_MAX_THREADS_SHIFT) | + GEN7_PS_32_DISPATCH_ENABLE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +static void +gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + uint32_t draw_x, draw_y; + uint32_t tile_mask_x, tile_mask_y; + + params->depth.get_draw_offsets(&draw_x, &draw_y); + gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y); /* 3DSTATE_DEPTH_BUFFER */ { @@ -409,47 +385,83 @@ gen7_blorp_exec(struct intel_context *intel, OUT_BATCH(0); ADVANCE_BATCH(); } +} - /* 3DSTATE_CLEAR_PARAMS - * - * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2 - * 3DSTATE_CLEAR_PARAMS: - * [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along - * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, - * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). - */ - { - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - /* 3DSTATE_DRAWING_RECTANGLE */ - { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(((params->x1 - 1) & 0xffff) | - ((params->y1 - 1) << 16)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } +/* 3DSTATE_CLEAR_PARAMS + * + * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2 + * 3DSTATE_CLEAR_PARAMS: + * [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along + * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, + * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). + */ +static void +gen7_blorp_emit_clear_params(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; - /* 3DPRIMITIVE */ - { - BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); - OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | - _3DPRIM_RECTLIST); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); - OUT_BATCH(1); /* instance count */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DPRIMITIVE */ +static void +gen7_blorp_emit_primitive(struct brw_context *brw, + const brw_blorp_params *params) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); + OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | + _3DPRIM_RECTLIST); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(1); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/** + * \copydoc gen6_blorp_exec() + */ +void +gen7_blorp_exec(struct intel_context *intel, + const brw_blorp_params *params) +{ + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + uint32_t depthstencil_offset; + + gen6_blorp_emit_batch_head(brw, params); + gen6_blorp_emit_vertices(brw, params); + gen7_blorp_emit_urb_config(brw, params); + depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); + gen7_blorp_emit_depth_stencil_state_pointers(brw, params, + depthstencil_offset); + gen6_blorp_emit_vs_disable(brw, params); + gen7_blorp_emit_hs_disable(brw, params); + gen7_blorp_emit_te_disable(brw, params); + gen7_blorp_emit_ds_disable(brw, params); + gen6_blorp_emit_gs_disable(brw, params); + gen7_blorp_emit_streamout_disable(brw, params); + gen6_blorp_emit_clip_disable(brw, params); + gen7_blorp_emit_sf_config(brw, params); + gen7_blorp_emit_wm_config(brw, params); + gen7_blorp_emit_ps_config(brw, params); + + gen7_blorp_emit_depth_stencil_config(brw, params); + gen7_blorp_emit_clear_params(brw, params); + gen6_blorp_emit_drawing_rectangle(brw, params); + gen7_blorp_emit_primitive(brw, params); /* See comments above at first invocation of intel_flush() in * gen6_blorp_emit_batch_head(). -- 2.7.4