From 7f6643ffd07702616b12c9f2a5c037eb78f09afc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 23 Sep 2021 12:13:08 -0400 Subject: [PATCH] radeonsi: make get_thread_id_in_tg non-static for future work Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 22 +++++++++++----------- src/gallium/drivers/radeonsi/si_shader_internal.h | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 9d2bb1a..4005642 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -38,7 +38,7 @@ static LLVMValueRef get_tgsize(struct si_shader_context *ctx) return si_unpack_param(ctx, ctx->args.merged_wave_info, 28, 4); } -static LLVMValueRef get_thread_id_in_tg(struct si_shader_context *ctx) +LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef tmp; @@ -277,7 +277,7 @@ static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout struct pipe_stream_output_info *so = &ctx->shader->selector->so; LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings); - LLVMValueRef tid = get_thread_id_in_tg(ctx); + LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx); LLVMValueRef tmp, tmp2; LLVMValueRef i32_2 = LLVMConstInt(ctx->ac.i32, 2, false); LLVMValueRef i32_4 = LLVMConstInt(ctx->ac.i32, 4, false); @@ -929,7 +929,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) assert(sel->info.stage == MESA_SHADER_VERTEX || (sel->info.stage == MESA_SHADER_TESS_EVAL && !shader->key.ge.as_es)); - LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); + LLVMValueRef es_vtxptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx)); LLVMValueRef packed_data = ctx->ac.i32_0; LLVMValueRef position[4] = {}; unsigned pos_index = 0; @@ -1056,7 +1056,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) for (unsigned i = 0; i < num_vertices; i++) gs_vtxptr[i] = ngg_nogs_vertex_ptr(ctx, vtxindex[i]); - es_vtxptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); + es_vtxptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx)); /* Adding these optimization barriers improves the generated code as follows. Crazy right? * @@ -1419,7 +1419,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) LLVMValueRef vertex_ptr = NULL; if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader)) - vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); + vertex_ptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx)); for (unsigned i = 0; i < info->num_outputs; i++) { outputs[i].semantic = info->output_semantic[i]; @@ -1596,7 +1596,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) */ if (info->output_semantic[i] == VARYING_SLOT_POS && ctx->shader->key.ge.opt.ngg_culling) { - vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); + vertex_ptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx)); for (unsigned j = 0; j < 4; j++) { tmp = LLVMConstInt(ctx->ac.i32, lds_pos_x + j, 0); @@ -1618,7 +1618,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) /* Wait for GS stores to finish. */ ac_build_s_barrier(&ctx->ac); - tmp = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); + tmp = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx)); tmp = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0); outputs[i].values[0] = LLVMBuildLoad(builder, tmp, ""); } else { @@ -1758,7 +1758,7 @@ void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LL ac_build_ifcc(&ctx->ac, can_emit, 9001); - const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx); + const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx), vertexidx); unsigned out_idx = 0; for (unsigned i = 0; i < info->num_outputs; i++) { for (unsigned chan = 0; chan < 4; chan++, out_idx++) { @@ -1816,7 +1816,7 @@ void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx) */ LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef scratchptr = ctx->gs_ngg_scratch; - LLVMValueRef tid = get_thread_id_in_tg(ctx); + LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx); LLVMValueRef tmp; tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), ""); @@ -1850,7 +1850,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) if (!info->num_stream_output_components[stream]) continue; - const LLVMValueRef gsthread = get_thread_id_in_tg(ctx); + const LLVMValueRef gsthread = gfx10_get_thread_id_in_tg(ctx); ac_build_bgnloop(&ctx->ac, 5100); @@ -1893,7 +1893,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) ac_build_s_barrier(&ctx->ac); - const LLVMValueRef tid = get_thread_id_in_tg(ctx); + const LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx); LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx); /* Streamout */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 4a7d216..e62ff52 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -185,6 +185,7 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader); /* gfx10_shader_ngg.c */ +LLVMValueRef gfx10_get_thread_id_in_tg(struct si_shader_context *ctx); bool gfx10_ngg_export_prim_early(struct si_shader *shader); void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx); void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef user_edgeflags[3], -- 2.7.4