From 70919f30c1fabe6e171c689f06913ff43e61c0bf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 13 Dec 2021 01:14:44 -0500 Subject: [PATCH] radeonsi: change si_shader_output_values::vertex_stream to a bitmask to match si_shader_info. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 9 ++++----- src/gallium/drivers/radeonsi/si_shader_internal.h | 2 +- src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 7 ++----- src/gallium/drivers/radeonsi/si_shader_llvm_vs.c | 14 ++++++++------ 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 0dcb62d..6c56722 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -244,7 +244,7 @@ static void build_streamout_vertex(struct si_shader_context *ctx, LLVMValueRef * for (unsigned comp = 0; comp < 4; comp++) { tmp = ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, 4 * reg + comp, false)); out.values[comp] = LLVMBuildLoad(builder, tmp, ""); - out.vertex_stream[comp] = (info->output_streams[reg] >> (2 * comp)) & 3; + out.vertex_streams = info->output_streams[reg]; } si_llvm_streamout_store_output(ctx, so_buffer, offset, &so->output[i], &out); @@ -1425,7 +1425,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) outputs[i].semantic = info->output_semantic[i]; for (unsigned j = 0; j < 4; j++) { - outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3; + outputs[i].vertex_streams = info->output_streams[i]; /* TODO: we may store more outputs than streamout needs, * but streamout performance isn't that important. @@ -1613,6 +1613,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) if (ctx->shader->key.ge.mono.u.vs_export_prim_id) { outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID; + outputs[i].vertex_streams = 0; if (ctx->stage == MESA_SHADER_VERTEX) { /* Wait for GS stores to finish. */ @@ -1629,8 +1630,6 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]); for (unsigned j = 1; j < 4; j++) outputs[i].values[j] = LLVMGetUndef(ctx->ac.f32); - - memset(outputs[i].vertex_stream, 0, sizeof(outputs[i].vertex_stream)); i++; } @@ -2148,7 +2147,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx); tmp = LLVMBuildLoad(builder, tmp, ""); outputs[i].values[j] = ac_to_float(&ctx->ac, tmp); - outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3; + outputs[i].vertex_streams = info->output_streams[i]; } } diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index fc923df..70f23d8 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -32,7 +32,7 @@ struct pipe_debug_callback; struct si_shader_output_values { LLVMValueRef values[4]; - ubyte vertex_stream[4]; + ubyte vertex_streams; ubyte semantic; }; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 44ac3a5..5a980a59 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -469,10 +469,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, /* Fill in output information. */ for (i = 0; i < gsinfo->num_outputs; ++i) { outputs[i].semantic = gsinfo->output_semantic[i]; - - for (int chan = 0; chan < 4; chan++) { - outputs[i].vertex_stream[chan] = (gsinfo->output_streams[i] >> (2 * chan)) & 3; - } + outputs[i].vertex_streams = gsinfo->output_streams[i]; } LLVMBasicBlockRef end_bb; @@ -500,7 +497,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, for (i = 0; i < gsinfo->num_outputs; ++i) { for (unsigned chan = 0; chan < 4; chan++) { if (!(gsinfo->output_usagemask[i] & (1 << chan)) || - outputs[i].vertex_stream[chan] != stream) { + ((outputs[i].vertex_streams >> (chan * 2)) & 0x3) != stream) { outputs[i].values[chan] = LLVMGetUndef(ctx.ac.f32); continue; } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 9b77c8b..e3a5a0e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -284,7 +284,7 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef /* Load the output as int. */ for (int j = 0; j < num_comps; j++) { - assert(stream_out->stream == shader_out->vertex_stream[start + j]); + assert(stream_out->stream == ((shader_out->vertex_streams >> ((start + j) * 2)) & 0x3)); out[j] = ac_to_integer(&ctx->ac, shader_out->values[start + j]); } @@ -452,8 +452,11 @@ static void si_prepare_param_exports(struct si_shader_context *ctx, for (unsigned i = 0; i < noutput; i++) { unsigned semantic = outputs[i].semantic; - if (outputs[i].vertex_stream[0] != 0 && outputs[i].vertex_stream[1] != 0 && - outputs[i].vertex_stream[2] != 0 && outputs[i].vertex_stream[3] != 0) + /* Skip if no channel writes to stream 0. */ + if (outputs[i].vertex_streams & 0x03 && + outputs[i].vertex_streams & 0x0c && + outputs[i].vertex_streams & 0x30 && + outputs[i].vertex_streams & 0xc0) continue; switch (semantic) { @@ -768,7 +771,7 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi) for (j = 0; j < 4; j++) { outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], ""); - outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3; + outputs[i].vertex_streams = info->output_streams[i]; } } @@ -778,11 +781,10 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi) /* Export PrimitiveID. */ if (ctx->shader->key.ge.mono.u.vs_export_prim_id) { outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID; + outputs[i].vertex_streams = 0; outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0)); for (j = 1; j < 4; j++) outputs[i].values[j] = LLVMConstReal(ctx->ac.f32, 0); - - memset(outputs[i].vertex_stream, 0, sizeof(outputs[i].vertex_stream)); i++; } -- 2.7.4