From 018c69ac56ee0579b5d4f138340444b326c4e6dc Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 18 Jul 2013 03:05:01 -0400 Subject: [PATCH] gallivm: export unordered/ordered cmp to a common function Only the floating point operarators change everything else is the same so it makes sense to share the code. Signed-off-by: Zack Rusin Reviewed-by: Jose Fonseca Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_logic.c | 441 ++++++++++----------------- 1 file changed, 158 insertions(+), 283 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 8b800cf..322d385 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -68,14 +68,17 @@ /** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x + * If the ordered argument is true the function will use LLVM's ordered + * comparisons, otherwise unordered comparisons will be used. * The result values will be 0 for false or ~0 for true. */ -LLVMValueRef -lp_build_compare(struct gallivm_state *gallivm, - const struct lp_type type, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) +static LLVMValueRef +lp_build_compare_ext(struct gallivm_state *gallivm, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b, + boolean ordered) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); @@ -94,153 +97,6 @@ lp_build_compare(struct gallivm_state *gallivm, if(func == PIPE_FUNC_ALWAYS) return ones; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - /* - * There are no unsigned integer comparison instructions in SSE. - */ - - if (!type.floating && !type.sign && - type.width * type.length == 128 && - util_cpu_caps.has_sse2 && - (func == PIPE_FUNC_LESS || - func == PIPE_FUNC_LEQUAL || - func == PIPE_FUNC_GREATER || - func == PIPE_FUNC_GEQUAL) && - (gallivm_debug & GALLIVM_DEBUG_PERF)) { - debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", - __FUNCTION__, type.length, type.width); - } -#endif - -#if HAVE_LLVM < 0x0207 -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128) { - if(type.floating && util_cpu_caps.has_sse) { - /* float[4] comparison */ - LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); - LLVMValueRef args[3]; - unsigned cc; - boolean swap; - - swap = FALSE; - switch(func) { - case PIPE_FUNC_EQUAL: - cc = 0; - break; - case PIPE_FUNC_NOTEQUAL: - cc = 4; - break; - case PIPE_FUNC_LESS: - cc = 1; - break; - case PIPE_FUNC_LEQUAL: - cc = 2; - break; - case PIPE_FUNC_GREATER: - cc = 1; - swap = TRUE; - break; - case PIPE_FUNC_GEQUAL: - cc = 2; - swap = TRUE; - break; - default: - assert(0); - return lp_build_undef(gallivm, type); - } - - if(swap) { - args[0] = b; - args[1] = a; - } - else { - args[0] = a; - args[1] = b; - } - - args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); - res = lp_build_intrinsic(builder, - "llvm.x86.sse.cmp.ps", - vec_type, - args, 3); - res = LLVMBuildBitCast(builder, res, int_vec_type, ""); - return res; - } - else if(util_cpu_caps.has_sse2) { - /* int[4] comparison */ - static const struct { - unsigned swap:1; - unsigned eq:1; - unsigned gt:1; - unsigned not:1; - } table[] = { - {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ - {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ - {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ - {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ - {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ - {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ - {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ - {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ - }; - const char *pcmpeq; - const char *pcmpgt; - LLVMValueRef args[2]; - LLVMValueRef res; - LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); - - switch (type.width) { - case 8: - pcmpeq = "llvm.x86.sse2.pcmpeq.b"; - pcmpgt = "llvm.x86.sse2.pcmpgt.b"; - break; - case 16: - pcmpeq = "llvm.x86.sse2.pcmpeq.w"; - pcmpgt = "llvm.x86.sse2.pcmpgt.w"; - break; - case 32: - pcmpeq = "llvm.x86.sse2.pcmpeq.d"; - pcmpgt = "llvm.x86.sse2.pcmpgt.d"; - break; - default: - assert(0); - return lp_build_undef(gallivm, type); - } - - /* There are no unsigned comparison instructions. So flip the sign bit - * so that the results match. - */ - if (table[func].gt && !type.sign) { - LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); - a = LLVMBuildXor(builder, a, msb, ""); - b = LLVMBuildXor(builder, b, msb, ""); - } - - if(table[func].swap) { - args[0] = b; - args[1] = a; - } - else { - args[0] = a; - args[1] = b; - } - - if(table[func].eq) - res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); - else if (table[func].gt) - res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); - else - res = LLVMConstNull(vec_type); - - if(table[func].not) - res = LLVMBuildNot(builder, res, ""); - - return res; - } - } /* if (type.width * type.length == 128) */ -#endif -#endif /* HAVE_LLVM < 0x0207 */ - if(type.floating) { LLVMRealPredicate op; switch(func) { @@ -251,22 +107,22 @@ lp_build_compare(struct gallivm_state *gallivm, op = LLVMRealPredicateTrue; break; case PIPE_FUNC_EQUAL: - op = LLVMRealUEQ; + op = ordered ? LLVMRealOEQ : LLVMRealUEQ; break; case PIPE_FUNC_NOTEQUAL: - op = LLVMRealUNE; + op = ordered ? LLVMRealONE : LLVMRealUNE; break; case PIPE_FUNC_LESS: - op = LLVMRealULT; + op = ordered ? LLVMRealOLT : LLVMRealULT; break; case PIPE_FUNC_LEQUAL: - op = LLVMRealULE; + op = ordered ? LLVMRealOLE : LLVMRealULE; break; case PIPE_FUNC_GREATER: - op = LLVMRealUGT; + op = ordered ? LLVMRealOGT : LLVMRealUGT; break; case PIPE_FUNC_GEQUAL: - op = LLVMRealUGE; + op = ordered ? LLVMRealOGE : LLVMRealUGE; break; default: assert(0); @@ -367,29 +223,20 @@ lp_build_compare(struct gallivm_state *gallivm, } /** - * Build code to compare two values 'a' and 'b' using the given func. + * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x - * If the operands are floating point numbers, the function will use - * ordered comparison which means that it will return true if both - * operands are not a NaN and the specified condition evaluates to true. * The result values will be 0 for false or ~0 for true. */ LLVMValueRef -lp_build_cmp_ordered(struct lp_build_context *bld, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) +lp_build_compare(struct gallivm_state *gallivm, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) { - struct gallivm_state *gallivm = bld->gallivm; - const struct lp_type type = bld->type; - - - LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); - LLVMValueRef cond; - LLVMValueRef res; assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); @@ -418,129 +265,157 @@ lp_build_cmp_ordered(struct lp_build_context *bld, __FUNCTION__, type.length, type.width); } #endif - if(type.floating) { - LLVMRealPredicate op; - switch(func) { - case PIPE_FUNC_NEVER: - op = LLVMRealPredicateFalse; - break; - case PIPE_FUNC_ALWAYS: - op = LLVMRealPredicateTrue; - break; - case PIPE_FUNC_EQUAL: - op = LLVMRealOEQ; - break; - case PIPE_FUNC_NOTEQUAL: - op = LLVMRealONE; - break; - case PIPE_FUNC_LESS: - op = LLVMRealOLT; - break; - case PIPE_FUNC_LEQUAL: - op = LLVMRealOLE; - break; - case PIPE_FUNC_GREATER: - op = LLVMRealOGT; - break; - case PIPE_FUNC_GEQUAL: - op = LLVMRealOGE; - break; - default: - assert(0); - return lp_build_undef(gallivm, type); - } -#if HAVE_LLVM >= 0x0207 - cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); -#else - if (type.length == 1) { - cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); - } - else { - unsigned i; +#if HAVE_LLVM < 0x0207 +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if(type.width * type.length == 128) { + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef cond; + LLVMValueRef res; + if(type.floating && util_cpu_caps.has_sse) { + /* float[4] comparison */ + LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); + LLVMValueRef args[3]; + unsigned cc; + boolean swap; - res = LLVMGetUndef(int_vec_type); + swap = FALSE; + switch(func) { + case PIPE_FUNC_EQUAL: + cc = 0; + break; + case PIPE_FUNC_NOTEQUAL: + cc = 4; + break; + case PIPE_FUNC_LESS: + cc = 1; + break; + case PIPE_FUNC_LEQUAL: + cc = 2; + break; + case PIPE_FUNC_GREATER: + cc = 1; + swap = TRUE; + break; + case PIPE_FUNC_GEQUAL: + cc = 2; + swap = TRUE; + break; + default: + assert(0); + return lp_build_undef(gallivm, type); + } - debug_printf("%s: warning: using slow element-wise float" - " vector comparison\n", __FUNCTION__); - for (i = 0; i < type.length; ++i) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - cond = LLVMBuildFCmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); + if(swap) { + args[0] = b; + args[1] = a; + } + else { + args[0] = a; + args[1] = b; } - } -#endif - } - else { - LLVMIntPredicate op; - switch(func) { - case PIPE_FUNC_EQUAL: - op = LLVMIntEQ; - break; - case PIPE_FUNC_NOTEQUAL: - op = LLVMIntNE; - break; - case PIPE_FUNC_LESS: - op = type.sign ? LLVMIntSLT : LLVMIntULT; - break; - case PIPE_FUNC_LEQUAL: - op = type.sign ? LLVMIntSLE : LLVMIntULE; - break; - case PIPE_FUNC_GREATER: - op = type.sign ? LLVMIntSGT : LLVMIntUGT; - break; - case PIPE_FUNC_GEQUAL: - op = type.sign ? LLVMIntSGE : LLVMIntUGE; - break; - default: - assert(0); - return lp_build_undef(gallivm, type); - } -#if HAVE_LLVM >= 0x0207 - cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); -#else - if (type.length == 1) { - cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); + res = lp_build_intrinsic(builder, + "llvm.x86.sse.cmp.ps", + vec_type, + args, 3); + res = LLVMBuildBitCast(builder, res, int_vec_type, ""); + return res; } - else { - unsigned i; + else if(util_cpu_caps.has_sse2) { + /* int[4] comparison */ + static const struct { + unsigned swap:1; + unsigned eq:1; + unsigned gt:1; + unsigned not:1; + } table[] = { + {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ + {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ + {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ + {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ + {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ + {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ + {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ + {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ + }; + const char *pcmpeq; + const char *pcmpgt; + LLVMValueRef args[2]; + LLVMValueRef res; + LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); - res = LLVMGetUndef(int_vec_type); + switch (type.width) { + case 8: + pcmpeq = "llvm.x86.sse2.pcmpeq.b"; + pcmpgt = "llvm.x86.sse2.pcmpgt.b"; + break; + case 16: + pcmpeq = "llvm.x86.sse2.pcmpeq.w"; + pcmpgt = "llvm.x86.sse2.pcmpgt.w"; + break; + case 32: + pcmpeq = "llvm.x86.sse2.pcmpeq.d"; + pcmpgt = "llvm.x86.sse2.pcmpgt.d"; + break; + default: + assert(0); + return lp_build_undef(gallivm, type); + } - if (gallivm_debug & GALLIVM_DEBUG_PERF) { - debug_printf("%s: using slow element-wise int" - " vector comparison\n", __FUNCTION__); + /* There are no unsigned comparison instructions. So flip the sign bit + * so that the results match. + */ + if (table[func].gt && !type.sign) { + LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); + a = LLVMBuildXor(builder, a, msb, ""); + b = LLVMBuildXor(builder, b, msb, ""); } - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - cond = LLVMBuildICmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); + if(table[func].swap) { + args[0] = b; + args[1] = a; } + else { + args[0] = a; + args[1] = b; + } + + if(table[func].eq) + res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); + else if (table[func].gt) + res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); + else + res = LLVMConstNull(vec_type); + + if(table[func].not) + res = LLVMBuildNot(builder, res, ""); + + return res; } + } /* if (type.width * type.length == 128) */ #endif - } +#endif /* HAVE_LLVM < 0x0207 */ - return res; + return lp_build_compare_ext(gallivm, type, func, a, b, FALSE); +} + +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \param func one of PIPE_FUNC_x + * If the operands are floating point numbers, the function will use + * ordered comparison which means that it will return true if both + * operands are not a NaN and the specified condition evaluates to true. + * The result values will be 0 for false or ~0 for true. + */ +LLVMValueRef +lp_build_cmp_ordered(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE); } /** -- 2.7.4