From d883ec0400cca2bc40ee9da59d7dbd561bbb3913 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Wed, 21 Jun 2017 11:13:48 -0700 Subject: [PATCH] i965: Convert WM_STATE to genxml on gen4-5. The code doesn't get exactly a lot simpler but at least it is in a single place, and we delete more than we add. Another good point is that you get rid of struct brw_wm_unit_state which was a third mechanism for encoding GEN state. We used to have GENXML, manual packing and these bitfield structs. Now we're down to just GENXML and some manual packing. (Khristian) Signed-off-by: Rafael Antognolli Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 - src/mesa/drivers/dri/i965/brw_state.h | 1 - src/mesa/drivers/dri/i965/brw_structs.h | 121 ------------ src/mesa/drivers/dri/i965/brw_wm.h | 2 - src/mesa/drivers/dri/i965/brw_wm_state.c | 274 -------------------------- src/mesa/drivers/dri/i965/genX_state_upload.c | 191 ++++++++++++++---- 6 files changed, 153 insertions(+), 437 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/brw_wm_state.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 60a41f8..431712f 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -60,7 +60,6 @@ i965_FILES = \ brw_vs_surface_state.c \ brw_wm.c \ brw_wm.h \ - brw_wm_state.c \ brw_wm_surface_state.c \ gen4_blorp_exec.h \ gen6_clip_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b9fa19c..1432a68 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -83,7 +83,6 @@ extern const struct brw_tracked_state brw_wm_image_surfaces; extern const struct brw_tracked_state brw_cs_ubo_surfaces; extern const struct brw_tracked_state brw_cs_abo_surfaces; extern const struct brw_tracked_state brw_cs_image_surfaces; -extern const struct brw_tracked_state brw_wm_unit; extern const struct brw_tracked_state brw_psp_urb_cbs; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 5a0d91d..fb592be 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -65,127 +65,6 @@ struct brw_urb_fence } bits1; }; -/* State structs for the various fixed function units: - */ - - -struct thread0 -{ - unsigned pad0:1; - unsigned grf_reg_count:3; - unsigned pad1:2; - unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ -}; - -struct thread1 -{ - unsigned ext_halt_exception_enable:1; - unsigned sw_exception_enable:1; - unsigned mask_stack_exception_enable:1; - unsigned timeout_exception_enable:1; - unsigned illegal_op_exception_enable:1; - unsigned pad0:3; - unsigned depth_coef_urb_read_offset:6; /* WM only */ - unsigned pad1:2; - unsigned floating_point_mode:1; - unsigned thread_priority:1; - unsigned binding_table_entry_count:8; - unsigned pad3:5; - unsigned single_program_flow:1; -}; - -struct thread2 -{ - unsigned per_thread_scratch_space:4; - unsigned pad0:6; - unsigned scratch_space_base_pointer:22; -}; - - -struct thread3 -{ - unsigned dispatch_grf_start_reg:4; - unsigned urb_entry_read_offset:6; - unsigned pad0:1; - unsigned urb_entry_read_length:6; - unsigned pad1:1; - unsigned const_urb_entry_read_offset:6; - unsigned pad2:1; - unsigned const_urb_entry_read_length:6; - unsigned pad3:1; -}; - -struct brw_wm_unit_state -{ - struct thread0 thread0; - struct thread1 thread1; - struct thread2 thread2; - struct thread3 thread3; - - struct { - unsigned stats_enable:1; - unsigned depth_buffer_clear:1; - unsigned sampler_count:3; - unsigned sampler_state_pointer:27; - } wm4; - - struct - { - unsigned enable_8_pix:1; - unsigned enable_16_pix:1; - unsigned enable_32_pix:1; - unsigned enable_con_32_pix:1; - unsigned enable_con_64_pix:1; - unsigned pad0:1; - - /* These next four bits are for Ironlake+ */ - unsigned fast_span_coverage_enable:1; - unsigned depth_buffer_clear:1; - unsigned depth_buffer_resolve_enable:1; - unsigned hierarchical_depth_buffer_resolve_enable:1; - - unsigned legacy_global_depth_bias:1; - unsigned line_stipple:1; - unsigned depth_offset:1; - unsigned polygon_stipple:1; - unsigned line_aa_region_width:2; - unsigned line_endcap_aa_region_width:2; - unsigned early_depth_test:1; - unsigned thread_dispatch_enable:1; - unsigned program_uses_depth:1; - unsigned program_computes_depth:1; - unsigned program_uses_killpixel:1; - unsigned legacy_line_rast: 1; - unsigned transposed_urb_read_enable:1; - unsigned max_threads:7; - } wm5; - - float global_depth_offset_constant; - float global_depth_offset_scale; - - /* for Ironlake only */ - struct { - unsigned pad0:1; - unsigned grf_reg_count_1:3; - unsigned pad1:2; - unsigned kernel_start_pointer_1:26; - } wm8; - - struct { - unsigned pad0:1; - unsigned grf_reg_count_2:3; - unsigned pad1:2; - unsigned kernel_start_pointer_2:26; - } wm9; - - struct { - unsigned pad0:1; - unsigned grf_reg_count_3:3; - unsigned pad1:2; - unsigned kernel_start_pointer_3:26; - } wm10; -}; - struct gen5_sampler_default_color { uint8_t ub[4]; float f[4]; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 613172a..113cdf3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -41,8 +41,6 @@ extern "C" { #endif -bool brw_color_buffer_write_enabled(struct brw_context *brw); - void brw_upload_wm_prog(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c deleted file mode 100644 index 69bbeb2..0000000 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - - -#include "intel_batchbuffer.h" -#include "intel_fbo.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_wm.h" -#include "compiler/nir/nir.h" - -/*********************************************************************** - * WM unit - fragment programs and rasterization - */ - -bool -brw_color_buffer_write_enabled(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct gl_program *fp = brw->fragment_program; - unsigned i; - - /* _NEW_BUFFERS */ - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - uint64_t outputs_written = fp->info.outputs_written; - - /* _NEW_COLOR */ - if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) || - outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) && - (ctx->Color.ColorMask[i][0] || - ctx->Color.ColorMask[i][1] || - ctx->Color.ColorMask[i][2] || - ctx->Color.ColorMask[i][3])) { - return true; - } - } - - return false; -} - -/** - * Setup wm hardware state. See page 225 of Volume 2 - */ -static void -brw_upload_wm_unit(struct brw_context *brw) -{ - const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct gl_program *fp = brw->fragment_program; - /* BRW_NEW_FS_PROG_DATA */ - const struct brw_wm_prog_data *prog_data = - brw_wm_prog_data(brw->wm.base.prog_data); - struct brw_wm_unit_state *wm; - - wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.base.state_offset); - memset(wm, 0, sizeof(*wm)); - - if (prog_data->dispatch_8 && prog_data->dispatch_16) { - /* These two fields should be the same pre-gen6, which is why we - * only have one hardware field to program for both dispatch - * widths. - */ - assert(prog_data->base.dispatch_grf_start_reg == - prog_data->dispatch_grf_start_reg_2); - } - - /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */ - wm->wm5.enable_8_pix = prog_data->dispatch_8; - wm->wm5.enable_16_pix = prog_data->dispatch_16; - - if (prog_data->dispatch_8 || prog_data->dispatch_16) { - wm->thread0.grf_reg_count = prog_data->reg_blocks_0; - wm->thread0.kernel_start_pointer = - brw_program_reloc(brw, - brw->wm.base.state_offset + - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.base.prog_offset + - (wm->thread0.grf_reg_count << 1)) >> 6; - } - - if (prog_data->prog_offset_2) { - wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_2; - wm->wm9.kernel_start_pointer_2 = - brw_program_reloc(brw, - brw->wm.base.state_offset + - offsetof(struct brw_wm_unit_state, wm9), - brw->wm.base.prog_offset + - prog_data->prog_offset_2 + - (wm->wm9.grf_reg_count_2 << 1)) >> 6; - } - - wm->thread1.depth_coef_urb_read_offset = 1; - if (prog_data->base.use_alt_mode) - wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - else - wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754; - - wm->thread1.binding_table_entry_count = - prog_data->base.binding_table.size_bytes / 4; - - if (prog_data->base.total_scratch != 0) { - wm->thread2.scratch_space_base_pointer = - brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */ - wm->thread2.per_thread_scratch_space = - ffs(brw->wm.base.per_thread_scratch) - 11; - } else { - wm->thread2.scratch_space_base_pointer = 0; - wm->thread2.per_thread_scratch_space = 0; - } - - wm->thread3.dispatch_grf_start_reg = - prog_data->base.dispatch_grf_start_reg; - wm->thread3.urb_entry_read_length = - prog_data->num_varying_inputs * 2; - wm->thread3.urb_entry_read_offset = 0; - wm->thread3.const_urb_entry_read_length = - prog_data->base.curb_read_length; - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ - wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; - - if (brw->gen == 5) - wm->wm4.sampler_count = 0; /* hardware requirement */ - else { - wm->wm4.sampler_count = (brw->wm.base.sampler_count + 1) / 4; - } - - if (brw->wm.base.sampler_count) { - /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */ - wm->wm4.sampler_state_pointer = (brw->batch.bo->offset64 + - brw->wm.base.sampler_offset) >> 5; - } else { - wm->wm4.sampler_state_pointer = 0; - } - - /* BRW_NEW_FRAGMENT_PROGRAM */ - wm->wm5.program_uses_depth = prog_data->uses_src_depth; - wm->wm5.program_computes_depth = (fp->info.outputs_written & - BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; - /* _NEW_BUFFERS - * Override for NULL depthbuffer case, required by the Pixel Shader Computed - * Depth field. - */ - if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH)) - wm->wm5.program_computes_depth = 0; - - /* _NEW_COLOR */ - wm->wm5.program_uses_killpixel = - prog_data->uses_kill || ctx->Color.AlphaEnabled; - - wm->wm5.max_threads = devinfo->max_wm_threads - 1; - - /* _NEW_BUFFERS | _NEW_COLOR */ - if (brw_color_buffer_write_enabled(brw) || - wm->wm5.program_uses_killpixel || - wm->wm5.program_computes_depth) { - wm->wm5.thread_dispatch_enable = 1; - } - - wm->wm5.legacy_line_rast = 0; - wm->wm5.legacy_global_depth_bias = 0; - wm->wm5.early_depth_test = 1; /* never need to disable */ - wm->wm5.line_aa_region_width = 0; - wm->wm5.line_endcap_aa_region_width = 1; - - /* _NEW_POLYGONSTIPPLE */ - wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag; - - /* _NEW_POLYGON */ - if (ctx->Polygon.OffsetFill) { - wm->wm5.depth_offset = 1; - /* Something weird going on with legacy_global_depth_bias, - * offset_constant, scaling and MRD. This value passes glean - * but gives some odd results elsewere (eg. the - * quad-offset-units test). - */ - wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2; - - /* This is the only value that passes glean: - */ - wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor; - } - - /* _NEW_LINE */ - wm->wm5.line_stipple = ctx->Line.StippleFlag; - - /* BRW_NEW_STATS_WM */ - if (brw->stats_wm) - wm->wm4.stats_enable = 1; - - /* Emit scratch space relocation */ - if (prog_data->base.total_scratch != 0) { - brw_emit_reloc(&brw->batch, - brw->wm.base.state_offset + - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.base.scratch_bo, - wm->thread2.per_thread_scratch_space, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - } - - /* Emit sampler state relocation */ - if (brw->wm.base.sampler_count != 0) { - brw_emit_reloc(&brw->batch, - brw->wm.base.state_offset + - offsetof(struct brw_wm_unit_state, wm4), - brw->batch.bo, - brw->wm.base.sampler_offset | wm->wm4.stats_enable | - (wm->wm4.sampler_count << 2), - I915_GEM_DOMAIN_INSTRUCTION, 0); - } - - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; - - /* _NEW_POLGYON */ - if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); - OUT_BATCH_F(ctx->Polygon.OffsetClamp); - ADVANCE_BATCH(); - - brw->wm.offset_clamp = ctx->Polygon.OffsetClamp; - } -} - -const struct brw_tracked_state brw_wm_unit = { - .dirty = { - .mesa = _NEW_BUFFERS | - _NEW_COLOR | - _NEW_LINE | - _NEW_POLYGON | - _NEW_POLYGONSTIPPLE, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_FS_PROG_DATA | - BRW_NEW_PROGRAM_CACHE | - BRW_NEW_SAMPLER_STATE_TABLE | - BRW_NEW_STATS_WM, - }, - .emit = brw_upload_wm_unit, -}; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index aa87e9c..64bcc2f 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -1738,7 +1738,33 @@ static const struct brw_tracked_state genX(sf_state) = { /* ---------------------------------------------------------------------- */ -#if GEN_GEN >= 6 +static bool +brw_color_buffer_write_enabled(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_FRAGMENT_PROGRAM */ + const struct gl_program *fp = brw->fragment_program; + unsigned i; + + /* _NEW_BUFFERS */ + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + uint64_t outputs_written = fp->info.outputs_written; + + /* _NEW_COLOR */ + if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) || + outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) && + (ctx->Color.ColorMask[i][0] || + ctx->Color.ColorMask[i][1] || + ctx->Color.ColorMask[i][2] || + ctx->Color.ColorMask[i][3])) { + return true; + } + } + + return false; +} + static void genX(upload_wm)(struct brw_context *brw) { @@ -1750,11 +1776,10 @@ genX(upload_wm)(struct brw_context *brw) UNUSED bool writes_depth = wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; + UNUSED struct brw_stage_state *stage_state = &brw->wm.base; + UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo; -#if GEN_GEN < 7 - const struct brw_stage_state *stage_state = &brw->wm.base; - const struct gen_device_info *devinfo = &brw->screen->devinfo; - +#if GEN_GEN == 6 /* We can't fold this into gen6_upload_wm_push_constants(), because * according to the SNB PRM, vol 2 part 1 section 7.2.2 * (3DSTATE_CONSTANT_PS [DevSNB]): @@ -1773,27 +1798,94 @@ genX(upload_wm)(struct brw_context *brw) } #endif +#if GEN_GEN >= 6 brw_batch_emit(brw, GENX(3DSTATE_WM), wm) { - wm.StatisticsEnable = true; wm.LineAntialiasingRegionWidth = _10pixels; wm.LineEndCapAntialiasingRegionWidth = _05pixels; + wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; + wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes; +#else + ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; + brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) { + if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) { + /* These two fields should be the same pre-gen6, which is why we + * only have one hardware field to program for both dispatch + * widths. + */ + assert(wm_prog_data->base.dispatch_grf_start_reg == + wm_prog_data->dispatch_grf_start_reg_2); + } + + if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) + wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0; + + if (stage_state->sampler_count) + wm.SamplerStatePointer = + instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset); +#if GEN_GEN == 5 + if (wm_prog_data->prog_offset_2) + wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2; +#endif + + wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2; + wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length; + /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ + wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2; + wm.EarlyDepthTestEnable = true; + wm.LineAntialiasingRegionWidth = _05pixels; + wm.LineEndCapAntialiasingRegionWidth = _10pixels; + + /* _NEW_POLYGON */ + if (ctx->Polygon.OffsetFill) { + wm.GlobalDepthOffsetEnable = true; + /* Something weird going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2; + + /* This is the only value that passes glean: + */ + wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor; + } + + wm.DepthCoefficientURBReadOffset = 1; +#endif + + /* BRW_NEW_STATS_WM */ + wm.StatisticsEnable = GEN_GEN >= 6 || brw->stats_wm; + #if GEN_GEN < 7 if (wm_prog_data->base.use_alt_mode) - wm.FloatingPointMode = Alternate; + wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate; - wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4); - wm.BindingTableEntryCount = wm_prog_data->base.binding_table.size_bytes / 4; + wm.SamplerCount = GEN_GEN == 5 ? + 0 : DIV_ROUND_UP(stage_state->sampler_count, 4); + + wm.BindingTableEntryCount = + wm_prog_data->base.binding_table.size_bytes / 4; wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1; wm._8PixelDispatchEnable = wm_prog_data->dispatch_8; wm._16PixelDispatchEnable = wm_prog_data->dispatch_16; wm.DispatchGRFStartRegisterForConstantSetupData0 = wm_prog_data->base.dispatch_grf_start_reg; - wm.DispatchGRFStartRegisterForConstantSetupData2 = - wm_prog_data->dispatch_grf_start_reg_2; - wm.KernelStartPointer0 = stage_state->prog_offset; - wm.KernelStartPointer2 = stage_state->prog_offset + - wm_prog_data->prog_offset_2; + if (GEN_GEN == 6 || + wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) { + wm.KernelStartPointer0 = KSP_ro(brw, + stage_state->prog_offset); + } + +#if GEN_GEN >= 5 + if (GEN_GEN == 6 || wm_prog_data->prog_offset_2) { + wm.KernelStartPointer2 = + KSP_ro(brw, stage_state->prog_offset + + wm_prog_data->prog_offset_2); + } +#endif + +#if GEN_GEN == 6 wm.DualSourceBlendEnable = wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) && ctx->Color.Blend[0]._UsesDualSrc; @@ -1817,42 +1909,34 @@ genX(upload_wm)(struct brw_context *brw) else wm.PositionXYOffsetSelect = POSOFFSET_NONE; + wm.DispatchGRFStartRegisterForConstantSetupData2 = + wm_prog_data->dispatch_grf_start_reg_2; +#endif + if (wm_prog_data->base.total_scratch) { wm.ScratchSpaceBasePointer = - render_bo(stage_state->scratch_bo, - ffs(stage_state->per_thread_scratch) - 11); + render_bo(stage_state->scratch_bo, 0); + wm.PerThreadScratchSpace = + ffs(stage_state->per_thread_scratch) - 11; } wm.PixelShaderComputedDepth = writes_depth; #endif - wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; - /* _NEW_LINE */ wm.LineStippleEnable = ctx->Line.StippleFlag; /* _NEW_POLYGON */ wm.PolygonStippleEnable = ctx->Polygon.StippleFlag; - wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes; #if GEN_GEN < 8 - /* _NEW_BUFFERS */ - const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; - wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; +#if GEN_GEN >= 6 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; - if (wm_prog_data->uses_kill || - _mesa_is_alpha_test_enabled(ctx) || - _mesa_is_alpha_to_coverage_enabled(ctx) || - wm_prog_data->uses_omask) { - wm.PixelShaderKillsPixel = true; - } - /* _NEW_BUFFERS | _NEW_COLOR */ - if (brw_color_buffer_write_enabled(brw) || writes_depth || - wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) { - wm.ThreadDispatchEnable = true; - } + /* _NEW_BUFFERS */ + const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; + if (multisampled_fbo) { /* _NEW_MULTISAMPLE */ if (ctx->Multisample.Enabled) @@ -1868,6 +1952,21 @@ genX(upload_wm)(struct brw_context *brw) wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; } +#endif + wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; + if (wm_prog_data->uses_kill || + _mesa_is_alpha_test_enabled(ctx) || + _mesa_is_alpha_to_coverage_enabled(ctx) || + (GEN_GEN >= 6 && wm_prog_data->uses_omask)) { + wm.PixelShaderKillsPixel = true; + } + + /* _NEW_BUFFERS | _NEW_COLOR */ + if (brw_color_buffer_write_enabled(brw) || writes_depth || + wm.PixelShaderKillsPixel || + (GEN_GEN >= 6 && wm_prog_data->has_side_effects)) { + wm.ThreadDispatchEnable = true; + } #if GEN_GEN >= 7 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; @@ -1898,6 +1997,16 @@ genX(upload_wm)(struct brw_context *brw) wm.EarlyDepthStencilControl = EDSC_PSEXEC; #endif } + +#if GEN_GEN <= 5 + if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) { + brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) { + clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp; + } + + brw->wm.offset_clamp = ctx->Polygon.OffsetClamp; + } +#endif } static const struct brw_tracked_state genX(wm_state) = { @@ -1905,17 +2014,23 @@ static const struct brw_tracked_state genX(wm_state) = { .mesa = _NEW_LINE | _NEW_POLYGON | (GEN_GEN < 8 ? _NEW_BUFFERS | - _NEW_COLOR | - _NEW_MULTISAMPLE : + _NEW_COLOR : 0) | - (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0), + (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0) | + (GEN_GEN < 6 ? _NEW_POLYGONSTIPPLE : 0) | + (GEN_GEN < 8 && GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0), .brw = BRW_NEW_BLORP | BRW_NEW_FS_PROG_DATA | + (GEN_GEN < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_PROGRAM_CACHE | + BRW_NEW_SAMPLER_STATE_TABLE | + BRW_NEW_STATS_WM + : 0) | (GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT), }, .emit = genX(upload_wm), }; -#endif /* ---------------------------------------------------------------------- */ @@ -5217,7 +5332,7 @@ genX(init_atoms)(struct brw_context *brw) &genX(vs_samplers), /* These set up state for brw_psp_urb_cbs */ - &brw_wm_unit, + &genX(wm_state), &genX(sf_clip_viewport), &genX(sf_state), &genX(vs_state), /* always required, enabled or not */ -- 2.7.4