unsigned payload_entry_bytes;
unsigned draw_entry_bytes;
unsigned num_entries;
+
+ /* True if the lowering needs to insert shader query. */
+ bool has_query;
} lower_tsms_io_state;
static nir_def *
intrin->intrinsic == nir_intrinsic_load_task_payload;
}
+static void
+task_invocation_query(nir_builder *b, lower_tsms_io_state *s)
+{
+ if (!s->has_query)
+ return;
+
+ const unsigned invocations = b->shader->info.workgroup_size[0] *
+ b->shader->info.workgroup_size[1] *
+ b->shader->info.workgroup_size[2];
+
+ nir_if *if_pipeline_query = nir_push_if(b, nir_load_pipeline_stat_query_enabled_amd(b));
+ {
+ nir_atomic_add_shader_invocation_count_amd(b, nir_imm_int(b, invocations));
+ }
+ nir_pop_if(b, if_pipeline_query);
+}
+
static nir_def *
lower_task_launch_mesh_workgroups(nir_builder *b,
nir_intrinsic_instr *intrin,
nir_scoped_memory_barrier(b, SCOPE_INVOCATION, NIR_MEMORY_RELEASE, nir_var_shader_out);
/* Ready bit, only write the low 8 bits. */
task_write_draw_ring(b, task_draw_ready_bit(b, s), 12, s);
+
+ task_invocation_query(b, s);
}
nir_pop_if(b, if_invocation_index_zero);
void
ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
unsigned task_payload_entry_bytes,
- unsigned task_num_entries)
+ unsigned task_num_entries,
+ bool has_query)
{
assert(util_is_power_of_two_nonzero(task_num_entries));
.draw_entry_bytes = 16,
.payload_entry_bytes = task_payload_entry_bytes,
.num_entries = task_num_entries,
+ .has_query = has_query,
};
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, device->physical_device->rad_info.gfx_level, false);
return true;
} else if (nir->info.stage == MESA_SHADER_TASK) {
- ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
- device->physical_device->task_info.num_entries);
+ ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries,
+ false);
return true;
} else if (nir->info.stage == MESA_SHADER_MESH) {
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries);