From 46b5bd6cadd13f47c10aafe9194c90234db91a2a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Jan 2010 12:59:36 -0700 Subject: [PATCH] llvmpipe: do the all-in test on the scalar c0 instead of vector c0 This still isn't faster, but committing it for posterity. --- src/gallium/drivers/llvmpipe/lp_state_fs.c | 45 ++++++++++++++---------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 7ce7202..6816db4 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -217,10 +217,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, struct lp_build_if_state ifctx; struct lp_type i32_type; LLVMTypeRef i32vec4_type, mask_type; - LLVMValueRef c0_vec, c1_vec, c2_vec; - - LLVMValueRef int_min_vec; LLVMValueRef not_draw_all; LLVMValueRef in_out_mask; @@ -238,21 +235,6 @@ generate_tri_edge_mask(LLVMBuilderRef builder, mask_type = LLVMIntType(32 * 4); - /* int_min_vec = {INT_MIN, INT_MIN, INT_MIN, INT_MIN} */ - int_min_vec = lp_build_int_const_scalar(i32_type, INT_MIN); - - - /* c0_vec = {c0, c0, c0, c0} - * Note that we emit this code four times but LLVM optimizes away - * three instances of it. - */ - c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); - c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); - c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); - lp_build_name(c0_vec, "edgeconst0vec"); - lp_build_name(c1_vec, "edgeconst1vec"); - lp_build_name(c2_vec, "edgeconst2vec"); - /* * Use a conditional here to do detailed pixel in/out testing. * We only have to do this if c0 != {INT_MIN, INT_MIN, INT_MIN, INT_MIN} @@ -260,17 +242,19 @@ generate_tri_edge_mask(LLVMBuilderRef builder, flow = lp_build_flow_create(builder); lp_build_flow_scope_begin(flow); -#define OPTIMIZE_IN_OUT_TEST 0 + { +#define OPTIMIZE_IN_OUT_TEST 1 #if OPTIMIZE_IN_OUT_TEST - in_out_mask = lp_build_compare(builder, i32_type, PIPE_FUNC_EQUAL, c0_vec, int_min_vec); - lp_build_name(in_out_mask, "inoutmaskvec"); not_draw_all = LLVMBuildICmp(builder, - LLVMIntEQ, - LLVMBuildBitCast(builder, in_out_mask, mask_type, ""), - LLVMConstNull(mask_type), + LLVMIntNE, + c0, + LLVMConstInt(LLVMInt32Type(), INT_MIN, 0), ""); + in_out_mask = lp_build_int_const_scalar(i32_type, ~0); + + lp_build_flow_scope_declare(flow, &in_out_mask); lp_build_if(&ifctx, flow, builder, not_draw_all); @@ -280,6 +264,18 @@ generate_tri_edge_mask(LLVMBuilderRef builder, LLVMValueRef m0_vec, m1_vec, m2_vec; LLVMValueRef index, m; + /* c0_vec = {c0, c0, c0, c0} + * Note that we emit this code four times but LLVM optimizes away + * three instances of it. + */ + c0_vec = lp_build_broadcast(builder, i32vec4_type, c0); + c1_vec = lp_build_broadcast(builder, i32vec4_type, c1); + c2_vec = lp_build_broadcast(builder, i32vec4_type, c2); + lp_build_name(c0_vec, "edgeconst0vec"); + lp_build_name(c1_vec, "edgeconst1vec"); + lp_build_name(c2_vec, "edgeconst2vec"); + + index = LLVMConstInt(LLVMInt32Type(), i, 0); step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), ""); step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), ""); @@ -305,6 +301,7 @@ generate_tri_edge_mask(LLVMBuilderRef builder, lp_build_endif(&ifctx); #endif + } lp_build_flow_scope_end(flow); lp_build_flow_destroy(flow); -- 2.7.4