Before the series with
3c9dc2d31b80fc73bffa1f40a91443a53229c8e2 to
dynamically assign our binding table indices, we didn't really track our
binding table count per shader, so we never filled in these fields.
Affects cairo-gl trace runtime by -2.47953% +/- 1.07281% (n=20)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
*/
vs->thread1.single_program_flow = (brw->gen == 5);
- vs->thread1.binding_table_entry_count = 0;
+ vs->thread1.binding_table_entry_count =
+ brw->vs.prog_data->base.base.binding_table.size_bytes / 4;
if (brw->vs.prog_data->base.total_scratch != 0) {
vs->thread2.scratch_space_base_pointer =
else
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
- wm->thread1.binding_table_entry_count = 0;
+ wm->thread1.binding_table_entry_count =
+ brw->wm.prog_data->base.binding_table.size_bytes / 4;
if (brw->wm.prog_data->total_scratch != 0) {
wm->thread2.scratch_space_base_pointer =
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(floating_point_mode |
- ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
+ ((ALIGN(stage_state->sampler_count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+ ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->vs.prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
/* CACHE_NEW_SAMPLER */
dw2 |= (ALIGN(brw->wm.base.sampler_count, 4) / 4) <<
GEN6_WM_SAMPLER_COUNT_SHIFT;
+
+ /* CACHE_NEW_WM_PROG */
+ dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
+ GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+
dw4 |= (brw->wm.prog_data->first_curbe_grf <<
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
- GEN6_GS_SAMPLER_COUNT_SHIFT));
+ GEN6_GS_SAMPLER_COUNT_SHIFT) |
+ ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->gs.prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
OUT_BATCH(stage_state->prog_offset);
OUT_BATCH(floating_point_mode |
((ALIGN(stage_state->sampler_count, 4)/4) <<
- GEN6_VS_SAMPLER_COUNT_SHIFT));
+ GEN6_VS_SAMPLER_COUNT_SHIFT) |
+ ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
+ GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
if (brw->vs.prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
dw2 |=
(ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
+ /* CACHE_NEW_WM_PROG */
+ dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
+ GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
+
/* Use ALT floating point mode for ARB fragment programs, because they
* require 0^0 == 1. Even though _CurrentFragmentProgram is used for
* rendering, CurrentFragmentProgram is used for this check to