Fixes:
ba02ed91a60 - ac/gfx11: fix the scratch buffer
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19477>
(cherry picked from commit
bdfacd0a24e023515fb7b7fae4a279cff0fbac4e)
"description": "radeonsi/gfx11: fix compute scratch buffer - WAVES is always per SE",
"nominated": true,
"nomination_type": 1,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": "ba02ed91a60839f2a6dc6a89fd9de1144b0788aa"
},
}
/* Return the register value and tune bytes_per_wave to increase scratch performance. */
-void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
+void ac_get_scratch_tmpring_size(const struct radeon_info *info,
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
uint32_t *tmpring_size)
{
*max_seen_bytes_per_wave = MAX2(*max_seen_bytes_per_wave, bytes_per_wave);
unsigned max_scratch_waves = info->max_scratch_waves;
- if (info->gfx_level >= GFX11 && !compute)
- max_scratch_waves /= info->num_se; /* WAVES is per SE for SPI_TMPRING_SIZE. */
+ if (info->gfx_level >= GFX11)
+ max_scratch_waves /= info->num_se; /* WAVES is per SE */
/* TODO: We could decrease WAVES to make the whole buffer fit into the infinity cache. */
*tmpring_size = S_0286E8_WAVES(max_scratch_waves) |
unsigned value_shift, const struct radeon_info *info,
void set_sh_reg(void*, unsigned, uint32_t));
-void ac_get_scratch_tmpring_size(const struct radeon_info *info, bool compute,
+void ac_get_scratch_tmpring_size(const struct radeon_info *info,
unsigned bytes_per_wave, unsigned *max_seen_bytes_per_wave,
uint32_t *tmpring_size);
}
unsigned tmpring_size;
- ac_get_scratch_tmpring_size(&sctx->screen->info, true,
+ ac_get_scratch_tmpring_size(&sctx->screen->info,
config->scratch_bytes_per_wave,
&sctx->max_seen_compute_scratch_bytes_per_wave, &tmpring_size);
bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes)
{
unsigned spi_tmpring_size;
- ac_get_scratch_tmpring_size(&sctx->screen->info, false, bytes,
+ ac_get_scratch_tmpring_size(&sctx->screen->info, bytes,
&sctx->max_seen_scratch_bytes_per_wave, &spi_tmpring_size);
unsigned scratch_needed_size = sctx->max_seen_scratch_bytes_per_wave *