From e79f8a4926dc79e32531f705b2db3bbd2d3984f4 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 19 Jan 2016 17:40:58 -0800 Subject: [PATCH] nir: Lower ldexp to arithmetic. This is a port of Matt's GLSL IR lowering pass to NIR. It's required because we translate SPIR-V directly to NIR, bypassing GLSL IR. I haven't introduced a lower_ldexp flag, as I believe all current NIR consumers would set the flag. i965 wants this, vc4 doesn't implement this feature, and st_glsl_to_tgsi currently lowers ldexp unconditionally anyway. Signed-off-by: Kenneth Graunke --- src/glsl/nir/nir_opt_algebraic.py | 63 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 1e80ba7..188c5b1 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -1,4 +1,5 @@ #! /usr/bin/env python +# -*- encoding: utf-8 -*- # # Copyright (C) 2014 Intel Corporation # @@ -267,6 +268,68 @@ for op in ['flt', 'fge', 'feq', 'fne', ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), ] +def ldexp_to_arith(x, exp): + """ + Translates + ldexp x exp + into + + extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + resulting_biased_exp = extracted_biased_exp + exp; + + if (resulting_biased_exp < 1) { + return copysign(0.0, x); + } + + return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + lshift(i2u(resulting_biased_exp), exp_shift)); + + which we can't actually implement as such, since NIR doesn't have + vectorized if-statements. We actually implement it without branches + using conditional-select: + + extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + resulting_biased_exp = extracted_biased_exp + exp; + + is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); + x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x)); + resulting_biased_exp = csel(is_not_zero_or_underflow, + resulting_biased_exp, 0); + + return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + lshift(i2u(resulting_biased_exp), exp_shift)); + """ + + sign_mask = 0x80000000 + exp_shift = 23 + exp_width = 8 + + # Extract the biased exponent from . + extracted_biased_exp = ('ushr', ('iabs', x), exp_shift) + resulting_biased_exp = ('iadd', extracted_biased_exp, exp) + + # Test if result is ±0.0, subnormal, or underflow by checking if the + # resulting biased exponent would be less than 0x1. If so, the result is + # 0.0 with the sign of x. (Actually, invert the conditions so that + # immediate values are the second arguments, which is better for i965) + zero_sign_x = ('iand', x, sign_mask) + + is_not_zero_or_underflow = ('ige', resulting_biased_exp, 0x1) + + # We could test for overflows by checking if the resulting biased exponent + # would be greater than 0xFE. Turns out we don't need to because the GLSL + # spec says: + # + # "If this product is too large to be represented in the + # floating-point type, the result is undefined." + + return ('bitfield_insert', + ('bcsel', is_not_zero_or_underflow, x, zero_sign_x), + ('bcsel', is_not_zero_or_underflow, resulting_biased_exp, 0), + exp_shift, exp_width) + +optimizations += [(('ldexp', 'x', 'exp'), ldexp_to_arith('x', 'exp'))] + # This section contains "late" optimizations that should be run after the # regular optimizations have finished. Optimizations should go here if # they help code generation but do not necessarily produce code that is -- 2.7.4