From 6571032af1d2c00150c4a6699a5fc385dd174ab9 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 29 May 2019 15:48:06 +0200 Subject: [PATCH] radeonsi/nir: Correctly handle double TCS/TES varyings ac expands the store to 32-bit components for us, but we still have to deal with storing up to 8 components, and when a varying is split across two vec4 slots we have to calculate the address again for the second slot, since they aren't adjacent in memory. I didn't do this on the ac level because we should generate better indexing arithmetic for the lds store, where slots are contiguous. Reviewed-by: Timothy Arceri --- src/gallium/drivers/radeonsi/si_shader.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d2927d0..5bd65e0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1253,8 +1253,20 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMValueRef value[4]; for (unsigned i = 0; i < num_components; i++) { unsigned offset = i; - if (llvm_type_is_64bit(ctx, type)) + if (llvm_type_is_64bit(ctx, type)) { offset *= 2; + if (offset == 4) { + addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, + vertex_index, + param_index, + driver_location + 1, + info->input_semantic_name, + info->input_semantic_index, + is_patch); + } + + offset = offset % 4; + } offset += component; value[i + component] = buffer_load(&ctx->bld_base, type, offset, @@ -1376,7 +1388,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, unsigned driver_location = var->data.driver_location; LLVMValueRef dw_addr, stride; LLVMValueRef buffer, base, addr; - LLVMValueRef values[4]; + LLVMValueRef values[8]; bool skip_lds_store; bool is_tess_factor = false, is_tess_inner = false; @@ -1438,11 +1450,22 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, info->output_semantic_index, is_patch); - for (unsigned chan = 0; chan < 4; chan++) { + for (unsigned chan = 0; chan < 8; chan++) { if (!(writemask & (1 << chan))) continue; LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); + unsigned buffer_store_offset = chan % 4; + if (chan == 4) { + addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, + vertex_index, + param_index, + driver_location + 1, + info->output_semantic_name, + info->output_semantic_index, + is_patch); + } + /* Skip LDS stores if there is no LDS read of this output. */ if (!skip_lds_store) lds_store(ctx, chan, dw_addr, value); @@ -1453,7 +1476,8 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, if (writemask != 0xF && !is_tess_factor) { ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base, - 4 * chan, 1, 0, true, false); + 4 * buffer_store_offset, + 1, 0, true, false); } /* Write tess factors into VGPRs for the epilog. */ -- 2.7.4