From: Ilia Mirkin Date: Fri, 21 Aug 2015 01:55:52 +0000 (-0400) Subject: glsl: provide the option of using BFE for unpack builting lowering X-Git-Tag: upstream/17.1.0~16493 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=275c5810ca7e38560b2a77281e7a0498c50126f8;p=platform%2Fupstream%2Fmesa.git glsl: provide the option of using BFE for unpack builting lowering This greatly improves generated code, especially for the snorm variants, since it is able to get rid of the lshift/rshift for sext, as well as replacing each shift + mask with a single op. Signed-off-by: Ilia Mirkin Reviewed-by: Matt Turner --- diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp index cd03859c..c9cf124 100644 --- a/src/glsl/ir_builder.cpp +++ b/src/glsl/ir_builder.cpp @@ -567,6 +567,12 @@ csel(operand a, operand b, operand c) } ir_expression * +bitfield_extract(operand a, operand b, operand c) +{ + return expr(ir_triop_bitfield_extract, a, b, c); +} + +ir_expression * bitfield_insert(operand a, operand b, operand c, operand d) { void *mem_ctx = ralloc_parent(a.val); diff --git a/src/glsl/ir_builder.h b/src/glsl/ir_builder.h index f76453f..b483ebf 100644 --- a/src/glsl/ir_builder.h +++ b/src/glsl/ir_builder.h @@ -200,6 +200,7 @@ ir_expression *interpolate_at_sample(operand a, operand b); ir_expression *fma(operand a, operand b, operand c); ir_expression *lrp(operand x, operand y, operand a); ir_expression *csel(operand a, operand b, operand c); +ir_expression *bitfield_extract(operand a, operand b, operand c); ir_expression *bitfield_insert(operand a, operand b, operand c, operand d); ir_swizzle *swizzle(operand a, int swizzle, int components); diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index b955874..265b223 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -69,6 +69,7 @@ enum lower_packing_builtins_op { LOWER_UNPACK_UNORM_4x8 = 0x0800, LOWER_PACK_USE_BFI = 0x1000, + LOWER_PACK_USE_BFE = 0x2000, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index 1d76ebf..c8bf68b 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -119,6 +119,7 @@ public: break; case LOWER_PACK_UNPACK_NONE: case LOWER_PACK_USE_BFI: + case LOWER_PACK_USE_BFE: assert(!"not reached"); break; } @@ -306,6 +307,39 @@ private: } /** + * \brief Unpack a uint32 into two int16's. + * + * Specifically each 16-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), + constant(16u)), + constant(16u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec2_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec2 i2; */ + ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, + "tmp_unpack_uint_to_ivec2_i2"); + + factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), + WRITEMASK_X)); + factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), + WRITEMASK_Y)); + + return deref(i2).val; + } + + /** * \brief Unpack a uint32 into four uint8's. * * Interpret the given uint32 as a uint8 4-tuple where the uint32's least @@ -329,13 +363,23 @@ private: /* u4.x = u & 0xffu; */ factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); - /* u4.y = (u >> 8u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), - constant(0xffu)), WRITEMASK_Y)); - - /* u4.z = (u >> 16u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), - constant(0xffu)), WRITEMASK_Z)); + if (op_mask & LOWER_PACK_USE_BFE) { + /* u4.y = bitfield_extract(u, 8, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)), + WRITEMASK_Y)); + + /* u4.z = bitfield_extract(u, 16, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)), + WRITEMASK_Z)); + } else { + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + } /* u4.w = (u >> 24u) */ factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); @@ -344,6 +388,43 @@ private: } /** + * \brief Unpack a uint32 into four int8's. + * + * Specifically each 8-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec4_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec4 i4; */ + ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, + "tmp_unpack_uint_to_ivec4_i4"); + + factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), + WRITEMASK_X)); + factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), + WRITEMASK_Y)); + factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), + WRITEMASK_Z)); + factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), + WRITEMASK_W)); + + return deref(i4).val; + } + + /** * \brief Lower a packSnorm2x16 expression. * * \param vec2_rval is packSnorm2x16's input @@ -489,9 +570,7 @@ private: assert(uint_rval->type == glsl_type::uint_type); ir_rvalue *result = - clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), - constant(16)), - constant(16u))), + clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), constant(32767.0f)), constant(-1.0f), constant(1.0f)); @@ -548,9 +627,7 @@ private: assert(uint_rval->type == glsl_type::uint_type); ir_rvalue *result = - clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), - constant(24u)), - constant(24u))), + clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), constant(127.0f)), constant(-1.0f), constant(1.0f)); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 7a8c4e1..95a25c1 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -6020,7 +6020,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) LOWER_UNPACK_HALF_2x16; if (ctx->Extensions.ARB_gpu_shader5) - lower_inst |= LOWER_PACK_USE_BFI; + lower_inst |= LOWER_PACK_USE_BFI | + LOWER_PACK_USE_BFE; lower_packing_builtins(ir, lower_inst); }