From 195952786b08fc76855b5b3015cf1e1d2c862944 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Sat, 6 Mar 2021 16:10:55 +0100 Subject: [PATCH] r600/sfn: Add algebraic lowering for fsin and fcos * fsin and fcos require normalization of the input * bitfield_insert requires an additional shift of the insert value v2: drop bitfield_insert lowering code, it is already avaibable as compiler option (Rhys Perry) Signed-off-by: Gert Wollny Reviewed-by: Kristian H. Kristensen Part-of: --- src/gallium/drivers/r600/Android.mk | 9 ++ src/gallium/drivers/r600/Makefile.sources | 3 +- src/gallium/drivers/r600/meson.build | 15 ++- .../drivers/r600/sfn/sfn_emitaluinstruction.cpp | 119 +-------------------- .../drivers/r600/sfn/sfn_emitaluinstruction.h | 2 - src/gallium/drivers/r600/sfn/sfn_nir.cpp | 1 + src/gallium/drivers/r600/sfn/sfn_nir.h | 1 + src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py | 49 +++++++++ 8 files changed, 81 insertions(+), 118 deletions(-) create mode 100644 src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py diff --git a/src/gallium/drivers/r600/Android.mk b/src/gallium/drivers/r600/Android.mk index 296aa3a..9d6b4c3 100644 --- a/src/gallium/drivers/r600/Android.mk +++ b/src/gallium/drivers/r600/Android.mk @@ -49,6 +49,15 @@ $(intermediates)/egd_tables.h: $(MESA_TOP)/src/gallium/drivers/r600/egd_tables.p @echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))" $(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/gallium/drivers/r600/egd_tables.py $(MESA_TOP)/src/gallium/drivers/r600/evergreend.h > $@ +r600_nir_algebraic_gen := $(LOCAL_PATH)/sfn/r600_nir_algebraic.py +r600_nir_algebraic_deps := \ + $(LOCAL_PATH)/sfn/r600_nir_algebraic.py \ + $(MESA_TOP)/src/compiler/nir/nir_algebraic.py + +$(intermediates)/sfn_nir_algebraic.c: $(r600_nir_algebraic_deps) + @mkdir -p $(dir $@) + $(hide) $(MESA_PYTHON2) $(nir_opt_algebraic_gen) $< > $@ + ifeq ($(MESA_ENABLE_LLVM),true) $(call mesa-build-with-llvm) endif diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index 6307960..52563b2 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -163,4 +163,5 @@ CXX_SOURCES = \ sfn/sfn_vertexstageexport.h R600_GENERATED_FILES = \ - egd_tables.h + egd_tables.h \ + sfn_nir_algebraic.c diff --git a/src/gallium/drivers/r600/meson.build b/src/gallium/drivers/r600/meson.build index c5c8a99..424ac3c 100644 --- a/src/gallium/drivers/r600/meson.build +++ b/src/gallium/drivers/r600/meson.build @@ -188,6 +188,19 @@ egd_tables_h = custom_target( capture : true, ) +sfn_nir_algebraic_c = custom_target( + 'sfn_nir_algebraic.c', + input : 'sfn/sfn_nir_algebraic.py', + output : 'sfn_nir_algebraic.c', + command : [ + prog_python, '@INPUT@', + '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), + ], + capture : true, + depend_files : nir_algebraic_py, +) + + r600_c_args = [] if with_gallium_opencl r600_c_args += '-DHAVE_OPENCL' @@ -195,7 +208,7 @@ endif libr600 = static_library( 'r600', - [files_r600, egd_tables_h], + [files_r600, egd_tables_h, sfn_nir_algebraic_c], c_args : [r600_c_args, '-Wstrict-overflow=0'], gnu_symbol_visibility : 'hidden', include_directories : [ diff --git a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp index 49e839a..80d78c0 100644 --- a/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_emitaluinstruction.cpp @@ -83,10 +83,9 @@ bool EmitAluInstruction::do_emit(nir_instr* ir) case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false); case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false); case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1}); - case nir_op_bfi: return emit_alu_op3(instr, op3_bfi_int); case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int); case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int); - case nir_op_bitfield_insert: return emit_bitfield_insert(instr); + case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int); case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int); case nir_op_cube_r600: return emit_cube(instr); @@ -97,7 +96,9 @@ bool EmitAluInstruction::do_emit(nir_instr* ir) case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs}); case nir_op_fadd: return emit_alu_op2(instr, op2_add); case nir_op_fceil: return emit_alu_op1(instr, op1_ceil); - case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos); + case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos); + + /* These are in the ALU instruction list, but they should be texture instructions */ case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false); case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true); @@ -130,7 +131,7 @@ bool EmitAluInstruction::do_emit(nir_instr* ir) case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne); case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1); case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp}); - case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin); + case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin); case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee); case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1); case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc); @@ -385,57 +386,6 @@ bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr) } } -bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode) -{ - // normalize by dividing by 2*PI, shift by 0.5, take fraction, and - // then shift back - - const float inv_2_pi = 0.15915494f; - - PValue v[4]; // this might need some additional temp register creation - for (unsigned i = 0; i < 4 ; ++i) - v[i] = from_nir(instr.dest, i); - - PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0)); - AluInstruction *ir = nullptr; - for (unsigned i = 0; i < 4 ; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - ir = new AluInstruction(op3_muladd_ieee, v[i], - {m_src[0][i], inv_pihalf, Value::zero_dot_5}, - {alu_write}); - if (instr.src[0].negate) ir->set_flag(alu_src0_neg); - emit_instruction(ir); - } - make_last(ir); - - for (unsigned i = 0; i < 4 ; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write}); - emit_instruction(ir); - } - make_last(ir); - - for (unsigned i = 0; i < 4 ; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write); - ir->set_flag(alu_src1_neg); - emit_instruction(ir); - } - make_last(ir); - - for (unsigned i = 0; i < 4 ; ++i) { - if (!(instr.dest.write_mask & (1 << i))) - continue; - - ir = new AluInstruction(opcode, v[i], v[i], last_write); - emit_instruction(ir); - } - return true; -} - bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute) { @@ -1032,65 +982,6 @@ bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction return true; } -bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr) -{ - auto t0 = get_temp_vec4(); - auto t1 = get_temp_vec4(); - auto t2 = get_temp_vec4(); - auto t3 = get_temp_vec4(); - - PValue l32(new LiteralValue(32)); - unsigned write_mask = instr.dest.write_mask; - if (!write_mask) return true; - - AluInstruction *ir = nullptr; - for (int i = 0; i < 4; i++) { - if (!(write_mask & (1< reorder={0,1,2}); bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false); - bool emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode); bool emit_alu_b2f(const nir_alu_instr& instr); bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op); @@ -88,7 +87,6 @@ private: bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp); bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine); - bool emit_bitfield_insert(const nir_alu_instr& instr); bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr); bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr); bool emit_pack_32_2x16_split(const nir_alu_instr& instr); diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 01eb875..bb54660 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -878,6 +878,7 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sel->nir, nir_lower_idiv, sel->nir->info.stage == MESA_SHADER_COMPUTE ? nir_lower_idiv_precise : nir_lower_idiv_fast); + NIR_PASS_V(sel->nir, r600_lower_alu); NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); if (lower_64bit) diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.h b/src/gallium/drivers/r600/sfn/sfn_nir.h index 7c30937..d13accb 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.h +++ b/src/gallium/drivers/r600/sfn/sfn_nir.h @@ -151,6 +151,7 @@ int r600_shader_from_nir(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, union r600_shader_key *key); +bool r600_lower_alu(nir_shader *sh); #ifdef __cplusplus } diff --git a/src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py b/src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py new file mode 100644 index 0000000..2ef0641 --- /dev/null +++ b/src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py @@ -0,0 +1,49 @@ +# +# Copyright (C) 2021 Collabora Ltd. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +import argparse +import sys + +lower_alu = [ + # For chipfamily r600 one must do fma (2*pi ffract() - 0.5) + (('fsin', "a@32"), ('fsin_r600', ('fadd', ('ffract', ('ffma', 'a', 0.15915494, 0.5)), -0.5))), + (('fcos', "a@32"), ('fcos_r600', ('fadd', ('ffract', ('ffma', 'a', 0.15915494, 0.5)), -0.5))), +] + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--import-path', required=True) + args = parser.parse_args() + sys.path.insert(0, args.import_path) + run() + + +def run(): + import nir_algebraic # pylint: disable=import-error + + print('#include "sfn/sfn_nir.h"') + + print(nir_algebraic.AlgebraicPass("r600_lower_alu", + lower_alu).render()) + +if __name__ == '__main__': + main() -- 2.7.4