From 9bfa987fb00a4e0471bcdb4948c8f416d7c5b562 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 28 May 2015 13:43:56 +0800 Subject: [PATCH] ilo: embed ilo_state_urb in ilo_state_vector --- src/gallium/drivers/ilo/core/ilo_builder_3d_top.h | 301 ++++++---------------- src/gallium/drivers/ilo/ilo_blitter.h | 2 + src/gallium/drivers/ilo/ilo_blitter_rectlist.c | 3 + src/gallium/drivers/ilo/ilo_render.c | 6 + src/gallium/drivers/ilo/ilo_render_gen.h | 2 + src/gallium/drivers/ilo/ilo_render_gen6.c | 65 +---- src/gallium/drivers/ilo/ilo_render_gen7.c | 98 ++----- src/gallium/drivers/ilo/ilo_state.c | 55 ++++ src/gallium/drivers/ilo/ilo_state.h | 3 + 9 files changed, 185 insertions(+), 350 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index 3a25221..2a475cb 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -38,290 +38,153 @@ #include "ilo_state_3d.h" #include "ilo_state_sampler.h" #include "ilo_state_sol.h" +#include "ilo_state_urb.h" #include "ilo_builder.h" static inline void gen6_3DSTATE_URB(struct ilo_builder *builder, - int vs_total_size, int gs_total_size, - int vs_entry_size, int gs_entry_size) + const struct ilo_state_urb *urb) { const uint8_t cmd_len = 3; - const int row_size = 128; /* 1024 bits */ - int vs_alloc_size, gs_alloc_size; - int vs_num_entries, gs_num_entries; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 6, 6); - - /* in 1024-bit URB rows */ - vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; - gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; - - /* the valid range is [1, 5] */ - if (!vs_alloc_size) - vs_alloc_size = 1; - if (!gs_alloc_size) - gs_alloc_size = 1; - assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); - - /* the valid range is [24, 256] in multiples of 4 */ - vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; - if (vs_num_entries > 256) - vs_num_entries = 256; - assert(vs_num_entries >= 24); - - /* the valid range is [0, 256] in multiples of 4 */ - gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; - if (gs_num_entries > 256) - gs_num_entries = 256; - ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2); - dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | - vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT; - dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | - (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT; + /* see urb_set_gen6_3DSTATE_URB() */ + dw[1] = urb->urb[0]; + dw[2] = urb->urb[1]; } static inline void -gen7_3dstate_push_constant_alloc(struct ilo_builder *builder, - int subop, int offset, int size) +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder, + const struct ilo_state_urb *urb) { - const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | - GEN6_RENDER_SUBTYPE_3D | - subop; const uint8_t cmd_len = 2; - const int slice_count = ((ilo_dev_gen(builder->dev) == ILO_GEN(7.5) && - builder->dev->gt == 3) || - ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 2 : 1; uint32_t *dw; - int end; - - ILO_DEV_ASSERT(builder->dev, 7, 8); - - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 68: - * - * "(A table that says the maximum size of each constant buffer is - * 16KB") - * - * From the Ivy Bridge PRM, volume 2 part 1, page 115: - * - * "The sum of the Constant Buffer Offset and the Constant Buffer Size - * may not exceed the maximum value of the Constant Buffer Size." - * - * Thus, the valid range of buffer end is [0KB, 16KB]. - */ - end = (offset + size) / 1024; - if (end > 16 * slice_count) { - assert(!"invalid constant buffer end"); - end = 16 * slice_count; - } - - /* the valid range of buffer offset is [0KB, 15KB] */ - offset = (offset + 1023) / 1024; - if (offset > 15 * slice_count) { - assert(!"invalid constant buffer offset"); - offset = 15 * slice_count; - } - - if (offset > end) { - assert(!size); - offset = end; - } - - /* the valid range of buffer size is [0KB, 15KB] */ - size = end - offset; - if (size > 15 * slice_count) { - assert(!"invalid constant buffer size"); - size = 15 * slice_count; - } - - assert(offset % slice_count == 0 && size % slice_count == 0); ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = cmd | (cmd_len - 2); - dw[1] = offset << GEN7_PCB_ALLOC_DW1_OFFSET__SHIFT | - size; -} - -static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder, - int offset, int size) -{ - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size); + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_VS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[0]; } static inline void gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder, - int offset, int size) + const struct ilo_state_urb *urb) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_HS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[1]; } static inline void gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder, - int offset, int size) + const struct ilo_state_urb *urb) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_DS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[2]; } static inline void gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder, - int offset, int size) + const struct ilo_state_urb *urb) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size); -} + const uint8_t cmd_len = 2; + uint32_t *dw; -static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder, - int offset, int size) -{ - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size); + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_GS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[3]; } static inline void -gen7_3dstate_urb(struct ilo_builder *builder, - int subop, int offset, int size, - int entry_size) +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder, + const struct ilo_state_urb *urb) { - const uint32_t cmd = GEN6_RENDER_TYPE_RENDER | - GEN6_RENDER_SUBTYPE_3D | - subop; const uint8_t cmd_len = 2; - const int row_size = 64; /* 512 bits */ - int alloc_size, num_entries, min_entries, max_entries; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 8); - - /* VS, HS, DS, and GS variants */ - assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS); - - /* in multiples of 8KB */ - assert(offset % 8192 == 0); - offset /= 8192; - - /* in multiple of 512-bit rows */ - alloc_size = (entry_size + row_size - 1) / row_size; - if (!alloc_size) - alloc_size = 1; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 34: - * - * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may - * cause performance to decrease due to banking in the URB. Element - * sizes of 16 to 20 should be programmed with six 512-bit URB rows." - */ - if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5) - alloc_size = 6; - - /* in multiples of 8 */ - num_entries = (size / row_size / alloc_size) & ~7; - - switch (subop) { - case GEN7_RENDER_OPCODE_3DSTATE_URB_VS: - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(8): - max_entries = 2560; - min_entries = 64; - break; - case ILO_GEN(7.5): - max_entries = (builder->dev->gt >= 2) ? 1664 : 640; - min_entries = (builder->dev->gt >= 2) ? 64 : 32; - break; - case ILO_GEN(7): - default: - max_entries = (builder->dev->gt == 2) ? 704 : 512; - min_entries = 32; - break; - } - - assert(num_entries >= min_entries); - if (num_entries > max_entries) - num_entries = max_entries; - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_HS: - max_entries = (builder->dev->gt == 2) ? 64 : 32; - if (num_entries > max_entries) - num_entries = max_entries; - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_DS: - if (num_entries) - assert(num_entries >= 138); - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_GS: - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(8): - max_entries = 960; - break; - case ILO_GEN(7.5): - max_entries = (builder->dev->gt >= 2) ? 640 : 256; - break; - case ILO_GEN(7): - default: - max_entries = (builder->dev->gt == 2) ? 320 : 192; - break; - } - - if (num_entries > max_entries) - num_entries = max_entries; - break; - default: - break; - } - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = cmd | (cmd_len - 2); - dw[1] = offset << GEN7_URB_DW1_OFFSET__SHIFT | - (alloc_size - 1) << GEN7_URB_DW1_ENTRY_SIZE__SHIFT | - num_entries; + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PUSH_CONSTANT_ALLOC_PS) | + (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->pcb[4]; } static inline void gen7_3DSTATE_URB_VS(struct ilo_builder *builder, - int offset, int size, int entry_size) + const struct ilo_state_urb *urb) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS, - offset, size, entry_size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_VS) | (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->urb[0]; } static inline void gen7_3DSTATE_URB_HS(struct ilo_builder *builder, - int offset, int size, int entry_size) + const struct ilo_state_urb *urb) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS, - offset, size, entry_size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_HS) | (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->urb[1]; } static inline void gen7_3DSTATE_URB_DS(struct ilo_builder *builder, - int offset, int size, int entry_size) + const struct ilo_state_urb *urb) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS, - offset, size, entry_size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_DS) | (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->urb[2]; } static inline void gen7_3DSTATE_URB_GS(struct ilo_builder *builder, - int offset, int size, int entry_size) + const struct ilo_state_urb *urb) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS, - offset, size, entry_size); + const uint8_t cmd_len = 2; + uint32_t *dw; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_URB_GS) | (cmd_len - 2); + /* see urb_set_gen7_3dstate_push_constant_alloc() */ + dw[1] = urb->urb[3]; } static inline void diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 3d02063..c257c60 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -70,6 +70,8 @@ struct ilo_blitter { uint32_t depth_clear_value; + struct ilo_state_urb urb; + struct { struct ilo_surface_cso dst; unsigned width, height; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index b2b839c..b106e79 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -78,6 +78,9 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) ilo_state_viewport_init_for_rectlist(&blitter->vp, blitter->ilo->dev, blitter->vp_data, sizeof(blitter->vp_data)); + ilo_state_urb_init_for_rectlist(&blitter->urb, blitter->ilo->dev, + blitter->ve.count + blitter->ve.prepend_nosrc_cso); + blitter->initialized = true; return true; diff --git a/src/gallium/drivers/ilo/ilo_render.c b/src/gallium/drivers/ilo/ilo_render.c index 6935138..0fd19e3 100644 --- a/src/gallium/drivers/ilo/ilo_render.c +++ b/src/gallium/drivers/ilo/ilo_render.c @@ -448,6 +448,8 @@ draw_session_prepare(struct ilo_render *render, session->prim_changed = true; session->primitive_restart_changed = true; + ilo_state_urb_full_delta(&vec->urb, render->dev, &session->urb_delta); + ilo_state_raster_full_delta(&vec->rasterizer->rs, render->dev, &session->rs_delta); @@ -462,6 +464,9 @@ draw_session_prepare(struct ilo_render *render, session->primitive_restart_changed = (render->state.primitive_restart != vec->draw->primitive_restart); + ilo_state_urb_get_delta(&vec->urb, render->dev, + &render->state.urb, &session->urb_delta); + if (vec->dirty & ILO_DIRTY_RASTERIZER) { ilo_state_raster_get_delta(&vec->rasterizer->rs, render->dev, &render->state.rs, &session->rs_delta); @@ -493,6 +498,7 @@ draw_session_end(struct ilo_render *render, render->state.reduced_prim = session->reduced_prim; render->state.primitive_restart = vec->draw->primitive_restart; + render->state.urb = vec->urb; render->state.rs = vec->rasterizer->rs; render->state.cc = vec->blend->cc; } diff --git a/src/gallium/drivers/ilo/ilo_render_gen.h b/src/gallium/drivers/ilo/ilo_render_gen.h index cc6f77d..74c1380 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen.h +++ b/src/gallium/drivers/ilo/ilo_render_gen.h @@ -90,6 +90,7 @@ struct ilo_render { int reduced_prim; int so_max_vertices; + struct ilo_state_urb urb; struct ilo_state_raster rs; struct ilo_state_cc cc; @@ -148,6 +149,7 @@ struct ilo_render_draw_session { bool prim_changed; bool primitive_restart_changed; + struct ilo_state_urb_delta urb_delta; struct ilo_state_raster_delta rs_delta; struct ilo_state_viewport_delta vp_delta; struct ilo_state_cc_delta cc_delta; diff --git a/src/gallium/drivers/ilo/ilo_render_gen6.c b/src/gallium/drivers/ilo/ilo_render_gen6.c index ff0bf2f..9d19995 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen6.c +++ b/src/gallium/drivers/ilo/ilo_render_gen6.c @@ -329,64 +329,19 @@ gen6_draw_common_urb(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { - /* 3DSTATE_URB */ - if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) { - const bool gs_active = (vec->gs || (vec->vs && - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))); - int vs_entry_size, gs_entry_size; - int vs_total_size, gs_total_size; - - vs_entry_size = (vec->vs) ? - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0; - - /* - * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS - * share VUE handles. The VUE allocation size must be large enough to - * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs. - * - * I am not sure if the PRM explicitly states that VF and VS share VUE - * handles. But here is a citation that implies so: - * - * From the Sandy Bridge PRM, volume 2 part 1, page 44: - * - * "Once a FF stage that spawn threads has sufficient input to - * initiate a thread, it must guarantee that it is safe to request - * the thread initiation. For all these FF stages, this check is - * based on : - * - * - The availability of output URB entries: - * - VS: As the input URB entries are overwritten with the - * VS-generated output data, output URB availability isn't a - * factor." - */ - if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso) - vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso; + const bool gs_active = (vec->gs || (vec->vs && + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))); - gs_entry_size = (vec->gs) ? - ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) : - (gs_active) ? vs_entry_size : 0; - - /* in bytes */ - vs_entry_size *= sizeof(float) * 4; - gs_entry_size *= sizeof(float) * 4; - vs_total_size = r->dev->urb_size; - - if (gs_active) { - vs_total_size /= 2; - gs_total_size = vs_total_size; - } - else { - gs_total_size = 0; - } - - gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size, - vs_entry_size, gs_entry_size); + /* 3DSTATE_URB */ + if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS | + ILO_STATE_URB_3DSTATE_URB_GS)) { + gen6_3DSTATE_URB(r->builder, &vec->urb); if (r->state.gs.active && !gs_active) gen6_wa_post_3dstate_urb_no_gs(r); - - r->state.gs.active = gs_active; } + + r->state.gs.active = gs_active; } static void @@ -920,9 +875,7 @@ ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r, gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve); - gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0, - (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float), - 0); + gen6_3DSTATE_URB(r->builder, &blitter->urb); if (r->state.gs.active) { gen6_wa_post_3dstate_urb_no_gs(r); diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 95884a0..f5c1a82 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -200,40 +200,17 @@ gen7_draw_common_urb(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_URB_{VS,GS,HS,DS} */ - if (DIRTY(VE) || DIRTY(VS)) { - /* the first 16KB are reserved for VS and PS PCBs */ - const int offset = - (ilo_dev_gen(r->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? - 32768 : 16384; - int vs_entry_size, vs_total_size; - - vs_entry_size = (vec->vs) ? - ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0; - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 35: - * - * "Programming Restriction: As the VS URB entry serves as both the - * per-vertex input and output of the VS shader, the VS URB - * Allocation Size must be sized to the maximum of the vertex input - * and output structures." - */ - if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso) - vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso; - - vs_entry_size *= sizeof(float) * 4; - vs_total_size = r->dev->urb_size - offset; - + if (session->urb_delta.dirty & (ILO_STATE_URB_3DSTATE_URB_VS | + ILO_STATE_URB_3DSTATE_URB_HS | + ILO_STATE_URB_3DSTATE_URB_DS | + ILO_STATE_URB_3DSTATE_URB_GS)) { if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_pre_vs(r); - gen7_3DSTATE_URB_VS(r->builder, - offset, vs_total_size, vs_entry_size); - - gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0); - gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0); - gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0); + gen7_3DSTATE_URB_VS(r->builder, &vec->urb); + gen7_3DSTATE_URB_GS(r->builder, &vec->urb); + gen7_3DSTATE_URB_HS(r->builder, &vec->urb); + gen7_3DSTATE_URB_DS(r->builder, &vec->urb); } } @@ -243,22 +220,15 @@ gen7_draw_common_pcb_alloc(struct ilo_render *r, struct ilo_render_draw_session *session) { /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */ - if (r->hw_ctx_changed) { - /* - * Push constant buffers are only allowed to take up at most the first - * 16KB of the URB. Split the space evenly for VS and FS. - */ - const int max_size = - (ilo_dev_gen(r->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? - 32768 : 16384; - const int size = max_size / 2; - int offset = 0; - - gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size); - offset += size; - - gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size); + if (session->urb_delta.dirty & + (ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_VS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_HS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_DS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_GS | + ILO_STATE_URB_3DSTATE_PUSH_CONSTANT_ALLOC_PS)) { + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &vec->urb); + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(r->builder, &vec->urb); + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &vec->urb); if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_post_3dstate_push_constant_alloc_ps(r); @@ -671,21 +641,8 @@ static void gen7_rectlist_pcb_alloc(struct ilo_render *r, const struct ilo_blitter *blitter) { - /* - * Push constant buffers are only allowed to take up at most the first - * 16KB of the URB. Split the space evenly for VS and FS. - */ - const int max_size = - (ilo_dev_gen(r->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? - 32768 : 16384; - const int size = max_size / 2; - int offset = 0; - - gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size); - offset += size; - - gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size); + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, &blitter->urb); + gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, &blitter->urb); if (ilo_dev_gen(r->dev) == ILO_GEN(7)) gen7_wa_post_3dstate_push_constant_alloc_ps(r); @@ -695,19 +652,10 @@ static void gen7_rectlist_urb(struct ilo_render *r, const struct ilo_blitter *blitter) { - /* the first 16KB are reserved for VS and PS PCBs */ - const int offset = - (ilo_dev_gen(r->dev) >= ILO_GEN(8)) || - (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? - 32768 : 16384; - - gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset, - (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * - 4 * sizeof(float)); - - gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0); - gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0); - gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0); + gen7_3DSTATE_URB_VS(r->builder, &blitter->urb); + gen7_3DSTATE_URB_GS(r->builder, &blitter->urb); + gen7_3DSTATE_URB_HS(r->builder, &blitter->urb); + gen7_3DSTATE_URB_DS(r->builder, &blitter->urb); } static void diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 0a568bf..896402c 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -478,6 +478,55 @@ finalize_vertex_elements(struct ilo_context *ilo) } static void +finalize_urb(struct ilo_context *ilo) +{ + const uint16_t attr_size = sizeof(uint32_t) * 4; + const struct ilo_dev *dev = ilo->dev; + struct ilo_state_vector *vec = &ilo->state_vector; + struct ilo_state_urb_info info; + + if (!(vec->dirty & (ILO_DIRTY_VE | ILO_DIRTY_VS | + ILO_DIRTY_GS | ILO_DIRTY_FS))) + return; + + memset(&info, 0, sizeof(info)); + + info.ve_entry_size = attr_size * + (vec->ve->count + vec->ve->prepend_nosrc_cso); + + if (vec->vs) { + info.vs_const_data = (bool) + (ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_PCB_CBUF0_SIZE) + + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_PCB_UCP_SIZE)); + info.vs_entry_size = attr_size * + ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT); + } + + if (vec->gs) { + info.gs_const_data = (bool) + ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_PCB_CBUF0_SIZE); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 189: + * + * "All outputs of a GS thread will be stored in the single GS + * thread output URB entry." + * + * TODO + */ + info.gs_entry_size = attr_size * + ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT); + } + + if (vec->fs) { + info.ps_const_data = (bool) + ilo_shader_get_kernel_param(vec->fs, ILO_KERNEL_PCB_CBUF0_SIZE); + } + + ilo_state_urb_set_info(&vec->urb, dev, &info); +} + +static void finalize_viewport(struct ilo_context *ilo) { const struct ilo_dev *dev = ilo->dev; @@ -680,6 +729,7 @@ ilo_finalize_3d_states(struct ilo_context *ilo, finalize_index_buffer(ilo); finalize_vertex_elements(ilo); + finalize_urb(ilo); finalize_rasterizer(ilo); finalize_viewport(ilo); finalize_blend(ilo); @@ -2065,6 +2115,8 @@ void ilo_state_vector_init(const struct ilo_dev *dev, struct ilo_state_vector *vec) { + struct ilo_state_urb_info urb_info; + vec->sample_mask = ~0u; ilo_state_viewport_init_data_only(&vec->viewport.vp, dev, @@ -2079,6 +2131,9 @@ ilo_state_vector_init(const struct ilo_dev *dev, ilo_state_sampler_init_disabled(&vec->disabled_sampler, dev); + memset(&urb_info, 0, sizeof(urb_info)); + ilo_state_urb_init(&vec->urb, dev, &urb_info); + util_dynarray_init(&vec->global_binding.bindings); vec->dirty = ILO_DIRTY_ALL; diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index ae4639f..908585a 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -34,6 +34,7 @@ #include "core/ilo_state_sampler.h" #include "core/ilo_state_sol.h" #include "core/ilo_state_surface.h" +#include "core/ilo_state_urb.h" #include "core/ilo_state_viewport.h" #include "core/ilo_state_zs.h" #include "pipe/p_state.h" @@ -275,6 +276,8 @@ struct ilo_state_vector { struct ilo_fb_state fb; + struct ilo_state_urb urb; + /* shader resources */ struct ilo_sampler_state sampler[PIPE_SHADER_TYPES]; struct ilo_view_state view[PIPE_SHADER_TYPES]; -- 2.7.4