From 50d335804fb9cfeb20f20b1c031e39a6d239791b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 5 Sep 2020 21:09:38 -0400 Subject: [PATCH] nir/algebraic: add late optimizations that optimize out mediump conversions (v3) v2: move *2*mp patterns to the end of late_optimizations v3: remove ftrunc from the optimizations to fix: dEQP-GLES3.functional.shaders.builtin_functions.common.modf.vec2_lowp_vertex Reviewed-by: Rob Clark (v1) Part-of: --- .gitlab-ci/traces-freedreno.yml | 2 +- src/compiler/nir/nir_opt_algebraic.py | 50 +++++++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/.gitlab-ci/traces-freedreno.yml b/.gitlab-ci/traces-freedreno.yml index a22e20f..163833a 100644 --- a/.gitlab-ci/traces-freedreno.yml +++ b/.gitlab-ci/traces-freedreno.yml @@ -230,7 +230,7 @@ traces: - path: glmark2/terrain.rdc expectations: - device: freedreno-a630 - checksum: 2368b3132a8768bc3a98b3fda0a4830e + checksum: 114f7dfe97768d9c565a29f656c8f9cf - path: glmark2/texture-texture-filter=linear.rdc expectations: - device: freedreno-a630 diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f90c1df..c948faf 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2126,17 +2126,6 @@ late_optimizations = [ (('~fadd', ('ffma(is_used_once)', a, b, ('fmul', 'c(is_not_const_and_not_fsign)', 'd(is_not_const_and_not_fsign)') ), 'e(is_not_const)'), ('ffma', a, b, ('ffma', c, d, e)), '(info->stage != MESA_SHADER_VERTEX && info->stage != MESA_SHADER_GEOMETRY) && !options->intel_vec4'), - # Convert *2*mp instructions to concrete *2*16 instructions. At this point - # any conversions that could have been removed will have been removed in - # nir_opt_algebraic so any remaining ones are required. - (('f2fmp', a), ('f2f16', a)), - (('f2imp', a), ('f2i16', a)), - (('f2ump', a), ('f2u16', a)), - (('i2imp', a), ('i2i16', a)), - (('i2fmp', a), ('i2f16', a)), - (('i2imp', a), ('u2u16', a)), - (('u2fmp', a), ('u2f16', a)), - # Section 8.8 (Integer Functions) of the GLSL 4.60 spec says: # # If bits is zero, the result will be zero. @@ -2199,6 +2188,45 @@ for op in ['ffma']: (('bcsel', a, (op, b, c, d), (op + '(is_used_once)', b, e, d)), (op, b, ('bcsel', a, c, e), d)), ] +# mediump: If an opcode is surrounded by conversions, remove the conversions. +# The rationale is that type conversions + the low precision opcode are more +# expensive that the same arithmetic opcode at higher precision. +# +# This must be done in late optimizations, because we need normal optimizations to +# first eliminate temporary up-conversions such as in op1(f2fmp(f2f32(op2()))). +# +# Unary opcodes +for op in ['fabs', 'fceil', 'fcos', 'fddx', 'fddx_coarse', 'fddx_fine', 'fddy', + 'fddy_coarse', 'fddy_fine', 'fexp2', 'ffloor', 'ffract', 'flog2', 'fneg', + 'frcp', 'fround_even', 'frsq', 'fsat', 'fsign', 'fsin', 'fsqrt']: + late_optimizations += [(('~f2f32', (op, ('f2fmp', a))), (op, a))] + +# Binary opcodes +for op in ['fadd', 'fdiv', 'fmax', 'fmin', 'fmod', 'fmul', 'fpow', 'frem']: + late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b))), (op, a, b))] + +# Ternary opcodes +for op in ['ffma', 'flrp']: + late_optimizations += [(('~f2f32', (op, ('f2fmp', a), ('f2fmp', b), ('f2fmp', c))), (op, a, b, c))] + +# Comparison opcodes +for op in ['feq', 'fge', 'flt', 'fneu']: + late_optimizations += [(('~' + op, ('f2fmp', a), ('f2fmp', b)), (op, a, b))] + +# Do this last, so that the f2fmp patterns above have effect. +late_optimizations += [ + # Convert *2*mp instructions to concrete *2*16 instructions. At this point + # any conversions that could have been removed will have been removed in + # nir_opt_algebraic so any remaining ones are required. + (('f2fmp', a), ('f2f16', a)), + (('f2imp', a), ('f2i16', a)), + (('f2ump', a), ('f2u16', a)), + (('i2imp', a), ('i2i16', a)), + (('i2fmp', a), ('i2f16', a)), + (('i2imp', a), ('u2u16', a)), + (('u2fmp', a), ('u2f16', a)), +] + distribute_src_mods = [ # Try to remove some spurious negations rather than pushing them down. (('fmul', ('fneg', a), ('fneg', b)), ('fmul', a, b)), -- 2.7.4