The number of shader engines isn't always 4.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9307>
}
uint64_t
-ac_thread_trace_get_data_offset(struct ac_thread_trace_data *data, unsigned se)
+ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
+ const struct ac_thread_trace_data *data, unsigned se)
{
+ unsigned max_se = rad_info->max_se;
uint64_t data_offset;
- data_offset = align64(sizeof(struct ac_thread_trace_info) * 4,
+ data_offset = align64(sizeof(struct ac_thread_trace_info) * max_se,
1 << SQTT_BUFFER_ALIGN_SHIFT);
data_offset += data->buffer_size * se;
}
uint64_t
-ac_thread_trace_get_data_va(struct ac_thread_trace_data *data, uint64_t va, unsigned se)
+ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
+ const struct ac_thread_trace_data *data, uint64_t va, unsigned se)
{
- return va + ac_thread_trace_get_data_offset(data, se);
+ return va + ac_thread_trace_get_data_offset(rad_info, data, se);
}
bool
ac_thread_trace_get_info_offset(unsigned se);
uint64_t
-ac_thread_trace_get_data_offset(struct ac_thread_trace_data *data, unsigned se);
+ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
+ const struct ac_thread_trace_data *data, unsigned se);
uint64_t
ac_thread_trace_get_info_va(uint64_t va, unsigned se);
uint64_t
-ac_thread_trace_get_data_va(struct ac_thread_trace_data *data, uint64_t va, unsigned se);
+ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
+ const struct ac_thread_trace_data *data, uint64_t va, unsigned se);
bool
ac_is_thread_trace_complete(struct radeon_info *rad_info, const struct ac_thread_trace_info *info);
uint32_t queue_family_index)
{
uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
- unsigned max_se = device->physical_device->rad_info.max_se;
+ struct radeon_info *rad_info = &device->physical_device->rad_info;
+ unsigned max_se = rad_info->max_se;
assert(device->physical_device->rad_info.chip_class >= GFX8);
for (unsigned se = 0; se < max_se; se++) {
uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
- uint64_t data_va = ac_thread_trace_get_data_va(&device->thread_trace, va, se);
+ uint64_t data_va = ac_thread_trace_get_data_va(rad_info, &device->thread_trace, va, se);
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
static bool
radv_thread_trace_init_bo(struct radv_device *device)
{
+ unsigned max_se = device->physical_device->rad_info.max_se;
struct radeon_winsys *ws = device->ws;
uint64_t size;
device->thread_trace.buffer_size = align64(device->thread_trace.buffer_size,
1u << SQTT_BUFFER_ALIGN_SHIFT);
- /* Compute total size of the thread trace BO for 4 SEs. */
- size = align64(sizeof(struct ac_thread_trace_info) * 4,
+ /* Compute total size of the thread trace BO for all SEs. */
+ size = align64(sizeof(struct ac_thread_trace_info) * max_se,
1 << SQTT_BUFFER_ALIGN_SHIFT);
- size += device->thread_trace.buffer_size * 4ll;
+ size += device->thread_trace.buffer_size * (uint64_t)max_se;
device->thread_trace.bo = ws->buffer_create(ws, size, 4096,
RADEON_DOMAIN_VRAM,
struct ac_thread_trace *thread_trace)
{
struct radv_device *device = queue->device;
- unsigned max_se = device->physical_device->rad_info.max_se;
+ struct radeon_info *rad_info = &device->physical_device->rad_info;
+ unsigned max_se = rad_info->max_se;
void *thread_trace_ptr = device->thread_trace.ptr;
memset(thread_trace, 0, sizeof(*thread_trace));
for (unsigned se = 0; se < max_se; se++) {
uint64_t info_offset = ac_thread_trace_get_info_offset(se);
- uint64_t data_offset = ac_thread_trace_get_data_offset(&device->thread_trace, se);
+ uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se);
void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset;
void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset;
struct ac_thread_trace_info *info =
static bool
si_thread_trace_init_bo(struct si_context *sctx)
{
+ unsigned max_se = sctx->screen->info.max_se;
struct radeon_winsys *ws = sctx->ws;
uint64_t size;
sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size,
1u << SQTT_BUFFER_ALIGN_SHIFT);
- /* Compute total size of the thread trace BO for 4 SEs. */
- size = align64(sizeof(struct ac_thread_trace_info) * 4,
+ /* Compute total size of the thread trace BO for all SEs. */
+ size = align64(sizeof(struct ac_thread_trace_info) * max_se,
1 << SQTT_BUFFER_ALIGN_SHIFT);
- size += sctx->thread_trace->buffer_size * 4ll;
+ size += sctx->thread_trace->buffer_size * (uint64_t)max_se;
sctx->thread_trace->bo =
ws->buffer_create(ws, size, 4096,
for (unsigned se = 0; se < max_se; se++) {
uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
- uint64_t data_va = ac_thread_trace_get_data_va(sctx->thread_trace, va, se);
+ uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se);
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
/* Target SEx and SH0. */
for (unsigned se = 0; se < max_se; se++) {
uint64_t info_offset = ac_thread_trace_get_info_offset(se);
- uint64_t data_offset = ac_thread_trace_get_data_offset(sctx->thread_trace, se);
+ uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se);
void *info_ptr = thread_trace_ptr + info_offset;
void *data_ptr = thread_trace_ptr + data_offset;
struct ac_thread_trace_info *info =