From a71d068fd07f5d34561b28f2f57267031077033a Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 29 Sep 2022 14:22:16 +0100 Subject: [PATCH] radv/llvm: fix GS shaders on GFX8/9 6698753cdb6 switched our GS output stores to use MUBUF. The stride doesn't matter for the ESGS descriptor (because idxen=false and the index stride is 64), but this fixes it anyway. This also changes ACO to use MUBUF store too, since MTBUF doesn't seem to work correctly with an invalid data format in the descriptor. Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Fixes: 6698753cdb6 ("ac/llvm: don't use tbuffer_store as a fallback for swizzled stores") Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 26 ++++++++++++-------------- src/amd/vulkan/radv_device.c | 8 ++++++++ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 399fbc7..300e320 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7823,20 +7823,18 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr) const_offset %= 4096u; } - aco_ptr mtbuf{create_instruction( - aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)}; - mtbuf->operands[0] = Operand(gsvs_ring); - mtbuf->operands[1] = vaddr_offset; - mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset)); - mtbuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]); - mtbuf->offen = !vaddr_offset.isUndefined(); - mtbuf->dfmt = V_008F0C_BUF_DATA_FORMAT_32; - mtbuf->nfmt = V_008F0C_BUF_NUM_FORMAT_UINT; - mtbuf->offset = const_offset; - mtbuf->glc = ctx->program->gfx_level < GFX11; - mtbuf->slc = true; - mtbuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder); - bld.insert(std::move(mtbuf)); + aco_ptr mubuf{create_instruction( + aco_opcode::buffer_store_dword, Format::MUBUF, 4, 0)}; + mubuf->operands[0] = Operand(gsvs_ring); + mubuf->operands[1] = vaddr_offset; + mubuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset)); + mubuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]); + mubuf->offen = !vaddr_offset.isUndefined(); + mubuf->offset = const_offset; + mubuf->glc = ctx->program->gfx_level < GFX11; + mubuf->slc = true; + mubuf->sync = memory_sync_info(storage_vmem_output, semantic_can_reorder); + bld.insert(std::move(mubuf)); } offset += ctx->shader->info.gs.vertices_out; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 1a3bb84..7c44781 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -4061,6 +4061,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl } else if (device->physical_device->rad_info.gfx_level >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); + } else if (device->physical_device->rad_info.gfx_level >= GFX8) { + /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ + desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); } else { desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1); @@ -4133,6 +4137,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl } else if (device->physical_device->rad_info.gfx_level >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); + } else if (device->physical_device->rad_info.gfx_level >= GFX8) { + /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ + desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); } else { desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1); -- 2.7.4