From 97e21cfa257292ea57b1d5f02a63908b5e373836 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 24 Feb 2017 02:09:47 +0100 Subject: [PATCH] ac: replace llvm.SI.tbuffer.store with llvm.amdgcn.buffer.store if ADD_TID=0 ADD_TID doesn't work. Needs more investigation. v2: remove leftover dead code Reviewed-by: Dave Airlie (v1) --- src/amd/common/ac_llvm_build.c | 58 +++++++++++++++++++++++++++++++- src/amd/common/ac_llvm_build.h | 4 ++- src/amd/common/ac_nir_to_llvm.c | 4 +-- src/gallium/drivers/radeonsi/si_shader.c | 22 ++++++------ 4 files changed, 73 insertions(+), 15 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 08fedc7..9435b18 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -551,8 +551,64 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef soffset, unsigned inst_offset, bool glc, - bool slc) + bool slc, + bool writeonly_memory, + bool has_add_tid) { + /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */ + if (HAVE_LLVM >= 0x0309 && !has_add_tid) { + /* Split 3 channel stores, becase LLVM doesn't support 3-channel + * intrinsics. */ + if (num_channels == 3) { + LLVMValueRef v[3], v01; + + for (int i = 0; i < 3; i++) { + v[i] = LLVMBuildExtractElement(ctx->builder, vdata, + LLVMConstInt(ctx->i32, i, 0), ""); + } + v01 = ac_build_gather_values(ctx, v, 2); + + ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset, + soffset, inst_offset, glc, slc, + writeonly_memory, has_add_tid); + ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset, + soffset, inst_offset + 8, + glc, slc, + writeonly_memory, has_add_tid); + return; + } + + unsigned func = CLAMP(num_channels, 1, 3) - 1; + static const char *types[] = {"f32", "v2f32", "v4f32"}; + char name[256]; + LLVMValueRef offset = soffset; + + if (inst_offset) + offset = LLVMBuildAdd(ctx->builder, offset, + LLVMConstInt(ctx->i32, inst_offset, 0), ""); + if (voffset) + offset = LLVMBuildAdd(ctx->builder, offset, voffset, ""); + + LLVMValueRef args[] = { + bitcast_to_float(ctx, vdata), + LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""), + LLVMConstInt(ctx->i32, 0, 0), + offset, + LLVMConstInt(ctx->i1, glc, 0), + LLVMConstInt(ctx->i1, slc, 0), + }; + + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s", + types[func]); + + ac_emit_llvm_intrinsic(ctx, name, ctx->voidt, + args, ARRAY_SIZE(args), + writeonly_memory ? + AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY : + AC_FUNC_ATTR_WRITEONLY); + return; + } + static unsigned dfmt[] = { V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_DATA_FORMAT_32_32, diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 78df441..aa99e92 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -130,7 +130,9 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef soffset, unsigned inst_offset, bool glc, - bool slc); + bool slc, + bool writeonly_memory, + bool has_add_tid); LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c363470..2c9ef49 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -3159,7 +3159,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring, out_val, 1, voffset, ctx->gs2vs_offset, 0, - 1, 1); + 1, 1, true, true); } idx += slot_inc; } @@ -4675,7 +4675,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx) out_val, 1, NULL, ctx->es2gs_offset, (4 * param_index + j + start) * 4, - 1, 1); + 1, 1, true, true); } } ctx->shader_info->vs.esgs_itemsize = (max_output_written + 1) * 16; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index c9dab80..9538304 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1049,7 +1049,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, if (inst->Dst[0].Register.WriteMask != 0xF && !is_tess_factor) { ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, buf_addr, base, - 4 * chan_index, 1, 0); + 4 * chan_index, 1, 0, true, false); } } @@ -1057,7 +1057,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm, values, 4); ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr, - base, 0, 1, 0); + base, 0, 1, 0, true, false); } } @@ -2087,7 +2087,7 @@ static void emit_streamout_output(struct si_shader_context *ctx, vdata, num_comps, so_write_offsets[buf_idx], LLVMConstInt(ctx->i32, 0, 0), - stream_out->dst_offset * 4, 1, 1); + stream_out->dst_offset * 4, 1, 1, true, false); } /** @@ -2412,7 +2412,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) lds_ptr); ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, - buffer_offset, 0, 1, 0); + buffer_offset, 0, 1, 0, true, false); } } @@ -2527,18 +2527,18 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, ac_build_buffer_store_dword(&ctx->ac, buffer, lp_build_const_int32(gallivm, 0x80000000), 1, lp_build_const_int32(gallivm, 0), tf_base, - 0, 1, 0); + 0, 1, 0, true, false); lp_build_endif(&inner_if_ctx); /* Store the tessellation factors. */ ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, - 4, 1, 0); + 4, 1, 0, true, false); if (vec1) ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, - 20, 1, 0); + 20, 1, 0, true, false); /* Store the tess factors into the offchip buffer if TES reads them. */ if (shader->key.part.tcs.epilog.tes_reads_tess_factors) { @@ -2560,7 +2560,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, - base, 0, 1, 0); + base, 0, 1, 0, true, false); if (inner_comps) { param_inner = si_shader_io_get_unique_index( TGSI_SEMANTIC_TESSINNER, 0); @@ -2571,7 +2571,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, lp_build_gather_values(gallivm, inner, inner_comps); ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, - base, 0, 1, 0); + base, 0, 1, 0, true, false); } } @@ -2695,7 +2695,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base) ctx->esgs_ring, out_val, 1, NULL, soffset, (4 * param_index + chan) * 4, - 1, 1); + 1, 1, true, true); } } } @@ -5063,7 +5063,7 @@ static void si_llvm_emit_vertex( ctx->gsvs_ring[stream], out_val, 1, voffset, soffset, 0, - 1, 1); + 1, 1, true, true); } } -- 2.7.4