From: Marek Olšák Date: Wed, 8 Jun 2016 11:21:25 +0000 (+0200) Subject: radeonsi: enable scratch coalescing X-Git-Tag: upstream/17.1.0~8771 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6e1b12c7881fe663cb500cb2f7374f4862bae179;p=platform%2Fupstream%2Fmesa.git radeonsi: enable scratch coalescing This makes one particular compute shader 8x faster. Latest LLVM git is required. Reviewed-by: Nicolai Hähnle --- diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 754b4af..f2bd337 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5903,8 +5903,16 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx, unsigned i; uint32_t scratch_rsrc_dword0 = scratch_va; uint32_t scratch_rsrc_dword1 = - S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) - | S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); + S_008F04_BASE_ADDRESS_HI(scratch_va >> 32); + + /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE + * correctly. + */ + if (HAVE_LLVM >= 0x0309) + scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1); + else + scratch_rsrc_dword1 |= + S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); for (i = 0 ; i < shader->binary.reloc_count; i++) { const struct radeon_shader_reloc *reloc =