ARRAY_SIZE(gfx11_generated_draws_spv_source) :
ARRAY_SIZE(gfx9_generated_draws_spv_source),
.send_count = device->info->ver >= 11 ?
- 11 /* 2 * (2 loads + 3 stores) + 1 store */ :
- 17 /* 2 * (2 loads + 6 stores) + 1 store */,
+ 12 /* 2 * (2 loads + 3 stores) + 1 load + 1 store */ :
+ 18 /* 2 * (2 loads + 6 stores) + 1 load + 1 store */,
.bind_map = {
- .num_bindings = 4,
+ .num_bindings = 5,
.bindings = {
{
.address_offset = offsetof(struct anv_generated_indirect_params,
draw_ids_addr),
},
{
+ .address_offset = offsetof(struct anv_generated_indirect_params,
+ draw_count_addr),
+ },
+ {
.push_constant = true,
},
},
/* This needs to match common_generated_draws.glsl :
*
- * layout(set = 0, binding = 3) uniform block
+ * layout(set = 0, binding = 4) uniform block
*/
struct anv_generated_indirect_draw_params {
/* Draw ID buffer address (only used on Gfx9) */
* gl_FragCoord
*/
uint32_t draw_base;
-
- /* Number of draws to generate */
- uint32_t draw_count;
-
/* Maximum number of draws (equals to draw_count for indirect draws without
* an indirect count)
*/
uint32_t max_draw_count;
-
/* Instance multiplier for multi view */
uint32_t instance_multiplier;
-
/* Address where to jump at after the generated draw (only used with
* indirect draw count variants)
*/
struct anv_generated_indirect_params {
struct anv_generated_indirect_draw_params draw;
+ /* Draw count value for non count variants of draw indirect commands */
+ uint32_t draw_count;
+
/* Global address of binding 0 */
uint64_t indirect_data_addr;
/* Global address of binding 2 */
uint64_t draw_ids_addr;
+ /* Global address of binding 3 (points to the draw_count field above) */
+ uint64_t draw_count_addr;
+
/* CPU side pointer to the previous item when number of draws has to be
* split into smaller chunks, see while loop in
* genX(cmd_buffer_emit_indirect_generated_draws)
bool indexed)
{
struct anv_device *device = cmd_buffer->device;
- struct anv_batch *batch = &cmd_buffer->generation_batch;
struct anv_state push_data_state =
genX(simple_shader_alloc_push)(&cmd_buffer->generation_shader_state,
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
+ struct anv_address draw_count_addr;
+ if (anv_address_is_null(count_addr)) {
+ draw_count_addr = anv_address_add(
+ genX(simple_shader_push_state_address)(
+ &cmd_buffer->generation_shader_state, push_data_state),
+ offsetof(struct anv_generated_indirect_params, draw_count));
+ } else {
+ draw_count_addr = count_addr;
+ }
+
struct anv_generated_indirect_params *push_data = push_data_state.map;
*push_data = (struct anv_generated_indirect_params) {
.draw = {
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
((generated_cmd_stride / 4) << 16),
.draw_base = item_base,
- /* If count_addr is not NULL, we'll edit it through a the command
- * streamer.
- */
- .draw_count = anv_address_is_null(count_addr) ? max_count : 0,
.max_draw_count = max_count,
.instance_multiplier = pipeline->instance_multiplier,
},
+ .draw_count = anv_address_is_null(count_addr) ? max_count : 0,
.indirect_data_addr = anv_address_physical(indirect_data_addr),
.generated_cmds_addr = anv_address_physical(generated_cmds_addr),
.draw_ids_addr = anv_address_physical(draw_id_addr),
+ .draw_count_addr = anv_address_physical(draw_count_addr),
};
- if (!anv_address_is_null(count_addr)) {
- /* Copy the draw count into the push constants so that the generation
- * gets the value straight away and doesn't even need to access memory.
- */
- struct mi_builder b;
- mi_builder_init(&b, device->info, batch);
- mi_memcpy(&b,
- anv_address_add(
- genX(simple_shader_push_state_address)(
- &cmd_buffer->generation_shader_state,
- push_data_state),
- offsetof(struct anv_generated_indirect_params, draw.draw_count)),
- count_addr, 4);
-
- /* Make sure the memcpy landed for the generating draw call to pick up
- * the value.
- */
- genx_batch_emit_pipe_control(batch, cmd_buffer->device->info,
- ANV_PIPE_CS_STALL_BIT);
- }
-
genX(emit_simple_shader_dispatch)(&cmd_buffer->generation_shader_state,
item_count, push_data_state);
uint draw_ids[];
};
+/* We're not using a uniform block for this because our compiler
+ * infrastructure relies on UBOs to be 32-bytes aligned so that we can push
+ * them into registers. This value can come directly from the indirect buffer
+ * given to indirect draw commands and the requirement there is 4-bytes
+ * alignment.
+ *
+ * Also use a prefix to the variable to remember to make a copy of it, avoid
+ * unnecessary accesses.
+ */
+layout(set = 0, binding = 3) buffer Storage3 {
+ uint _draw_count;
+};
+
/* This data will be provided through push constants. */
-layout(set = 0, binding = 3) uniform block {
+layout(set = 0, binding = 4) uniform block {
uint64_t draw_id_addr;
uint64_t indirect_data_addr;
uint indirect_data_stride;
uint flags;
uint draw_base;
- uint draw_count;
uint max_draw_count;
uint instance_multiplier;
uint64_t end_addr;
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
uint cmd_idx = item_idx * _3dprim_dw_size;
uint draw_id = draw_base + item_idx;
+ uint draw_count = _draw_count;
if (draw_id < draw_count)
write_draw(item_idx, cmd_idx, draw_id);
uint item_idx = uint(gl_FragCoord.y) * 8192 + uint(gl_FragCoord.x);
uint cmd_idx = item_idx * _3dprim_dw_size;
uint draw_id = draw_base + item_idx;
+ uint draw_count = _draw_count;
if (draw_id < draw_count)
write_draw(item_idx, cmd_idx, draw_id);