From b739376cffec19870804b1ebd4bef3c2f654e943 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 11 Apr 2013 02:22:06 -0700 Subject: [PATCH] gallivm/gs: fix the end primitive calls The issue with SOA execution and end_primitive opcode is that it can be executed both when we haven't emitted any vertices, in which case we don't want to emit an empty primitive, and when the execution mask is zero and the execution should be skipped. We handled only the latter of those conditions. Now we're combining the execution mask with a mask created from emitted vertices to handle both cases. As a result we don't need the pending_end_primitive flag which was broken because it was static and could be affected by both above mentioned conditions at run-time. Signed-off-by: Zack Rusin Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 6 --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 71 +++++++++++++++++-------- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 23ccacc..f1b1d79 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -396,12 +396,6 @@ struct lp_build_tgsi_soa_context LLVMValueRef emitted_prims_vec_ptr; LLVMValueRef total_emitted_vertices_vec_ptr; LLVMValueRef emitted_vertices_vec_ptr; - /* if a shader doesn't have ENDPRIM instruction but it has - * a number of EMIT instructions it means the END instruction - * implicitly invokes ENDPRIM. handle this via a flag here - * in the future maybe we can enforce TGSI to always have - * an explicit ENDPRIM */ - boolean pending_end_primitive; LLVMValueRef consts_ptr; const LLVMValueRef *pos; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 7396536..9822f72 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -65,6 +65,7 @@ #include "lp_bld_sample.h" #include "lp_bld_struct.h" +#define DUMP_GS_EMITS 0 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) { @@ -2278,27 +2279,25 @@ emit_vertex( increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, masked_ones); #if DUMP_GS_EMITS - lp_build_print_value(bld->bld_base.base.gallivm, " +++ emit vertex masked ones = ", + lp_build_print_value(bld->bld_base.base.gallivm, + " +++ emit vertex masked ones = ", masked_ones); - lp_build_print_value(bld->bld_base.base.gallivm, " +++ emit vertex emitted = ", + lp_build_print_value(bld->bld_base.base.gallivm, + " +++ emit vertex emitted = ", total_emitted_vertices_vec); #endif - bld->pending_end_primitive = TRUE; } } static void -end_primitive( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) +end_primitive_masked(struct lp_build_tgsi_context * bld_base, + LLVMValueRef masked_ones) { struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; if (bld->gs_iface->end_primitive) { - LLVMValueRef masked_ones = mask_to_one_vec(bld_base); LLVMValueRef emitted_vertices_vec = LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); LLVMValueRef emitted_prims_vec = @@ -2309,23 +2308,55 @@ end_primitive( emitted_prims_vec); #if DUMP_GS_EMITS - lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim masked ones = ", + lp_build_print_value(bld->bld_base.base.gallivm, + " +++ end prim masked ones = ", masked_ones); - lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted verts1 = ", + lp_build_print_value(bld->bld_base.base.gallivm, + " +++ end prim emitted verts1 = ", emitted_vertices_vec); - lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted prims1 = ", - LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "")); + lp_build_print_value(bld->bld_base.base.gallivm, + " +++ end prim emitted prims1 = ", + LLVMBuildLoad(builder, + bld->emitted_prims_vec_ptr, "")); #endif increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, masked_ones); clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, masked_ones); #if DUMP_GS_EMITS - lp_build_print_value(bld->bld_base.base.gallivm, " +++ end prim emitted verts2 = ", - LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "")); + lp_build_print_value(bld->bld_base.base.gallivm, + " +++ end prim emitted verts2 = ", + LLVMBuildLoad(builder, + bld->emitted_vertices_vec_ptr, "")); #endif + } - bld->pending_end_primitive = FALSE; +} + +static void +end_primitive( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + + if (bld->gs_iface->end_primitive) { + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef masked_ones = mask_to_one_vec(bld_base); + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMValueRef emitted_verts = LLVMBuildLoad( + builder, bld->emitted_vertices_vec_ptr, ""); + LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, + emitted_verts, + uint_bld->zero); + /* We need to combine the current execution mask with the mask + telling us which, if any, execution slots actually have + unemitted primitives, this way we make sure that end_primitives + executes only on the paths that have unflushed vertices */ + masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, ""); + + end_primitive_masked(bld_base, masked_ones); } } @@ -2670,11 +2701,10 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base) if (bld->gs_iface) { LLVMValueRef total_emitted_vertices_vec; LLVMValueRef emitted_prims_vec; - /* flush the accumulated vertices as a primitive */ - if (bld->pending_end_primitive) { - end_primitive(NULL, bld_base, NULL); - bld->pending_end_primitive = FALSE; - } + /* implicit end_primitives, needed in case there are any unflushed + vertices in the cache */ + end_primitive(NULL, bld_base, NULL); + total_emitted_vertices_vec = LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); emitted_prims_vec = @@ -2785,7 +2815,6 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, /* inputs are always indirect with gs */ bld.indirect_files |= (1 << TGSI_FILE_INPUT); bld.gs_iface = gs_iface; - bld.pending_end_primitive = FALSE; bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; -- 2.7.4